def minimize(self, model, original_pdb_h, excl_string_selection, reference_map): # print "ncs_restraints_group_list", ncs_restraints_group_list # assert 0 if reference_map is None: minimize_wrapper_for_ramachandran( model=model, original_pdb_h=original_pdb_h, excl_string_selection=excl_string_selection, number_of_cycles=self.params.number_of_refinement_cycles, log=self.log, ) self._update_model_h() else: print("Using map as reference", file=self.log) self.log.flush() if self.params.use_hydrogens_in_minimization: self._update_model_h() mwwm = minimize_wrapper_with_map( model=self.model_h, target_map=reference_map, number_of_cycles=self.params.number_of_refinement_cycles, cycles_to_converge=self.params.cycles_to_converge, log=self.log) self._update_model_from_model_h() else: mwwm = minimize_wrapper_with_map( model=model, target_map=reference_map, number_of_cycles=self.params.number_of_refinement_cycles, cycles_to_converge=self.params.cycles_to_converge, log=self.log) self._update_model_h()
def minimize(self, model, original_pdb_h, excl_string_selection, reference_map): # print "ncs_restraints_group_list", ncs_restraints_group_list # assert 0 if reference_map is None: minimize_wrapper_for_ramachandran( model=model, original_pdb_h=original_pdb_h, excl_string_selection=excl_string_selection, number_of_cycles=self.params.number_of_refinement_cycles, log=self.log, ) else: print >> self.log, "Using map as reference" self.log.flush() mwwm = minimize_wrapper_with_map( model=model, target_map=reference_map, number_of_cycles=self.params.number_of_refinement_cycles, log=self.log)
def minimize(self, hierarchy, xrs, original_pdb_h, grm, ncs_restraints_group_list, excl_string_selection, ss_annotation, reference_map): if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=hierarchy, xrs=xrs, original_pdb_h=original_pdb_h, grm=None, # anyway need to reprocess just because of reference model restraints excl_string_selection=excl_string_selection, number_of_cycles=self.params.number_of_refinement_cycles, log=self.log, ncs_restraints_group_list=ncs_restraints_group_list, ss_annotation=ss_annotation, mon_lib_srv=self.mon_lib_srv, ener_lib=self.ener_lib, rotamer_manager=self.rotamer_manager) else: print >> self.log, "Using map as reference" self.log.flush() mwwm = minimize_wrapper_with_map( pdb_h=hierarchy, xrs=xrs, target_map=reference_map, grm=grm, mon_lib_srv=self.mon_lib_srv, rotamer_manager=self.rotamer_manager, ncs_restraints_group_list=ncs_restraints_group_list, ss_annotation=ss_annotation, number_of_cycles=self.params.number_of_refinement_cycles, log=self.log)
def substitute_ss(real_h, xray_structure, ss_annotation, params = None, grm=None, use_plane_peptide_bond_restr=True, fix_rotamer_outliers=True, cif_objects=None, log=null_out(), rotamer_manager=None, reference_map=None, verbose=False): """ Substitute secondary structure elements in real_h hierarchy with ideal ones _in_place_. Returns reference torsion proxies - the only thing that cannot be restored with little effort outside the procedure. real_h - hierarcy to substitute secondary structure elements. xray_structure - xray_structure - needed to get crystal symmetry (to construct processed_pdb_file and xray_structure is needed to call get_geometry_restraints_manager for no obvious reason). ss_annotation - iotbx.pdb.annotation object. """ t0 = time() if rotamer_manager is None: rotamer_manager = RotamerEval() for model in real_h.models(): for chain in model.chains(): if len(chain.conformers()) > 1: raise Sorry("Alternative conformations are not supported.") processed_params = process_params(params) if not processed_params.enabled: return None expected_n_hbonds = 0 ann = ss_annotation for h in ann.helices: expected_n_hbonds += h.get_n_maximum_hbonds() edited_h = real_h.deep_copy() n_atoms_in_real_h = real_h.atoms_size() selection_cache = real_h.atom_selection_cache() # check the annotation for correctness (atoms are actually in hierarchy) error_msg = "The following secondary structure annotations result in \n" error_msg +="empty atom selections. They don't match the structre: \n" t1 = time() # Checking for SS selections deleted_annotations = ann.remove_empty_annotations( hierarchy=real_h, asc=selection_cache) if not deleted_annotations.is_empty(): if processed_params.skip_empty_ss_elements: if len(deleted_annotations.helices) > 0: print >> log, "Removing the following helices because there are" print >> log, "no corresponding atoms in the model:" for h in deleted_annotations.helices: print >> log, h.as_pdb_str() error_msg += " %s\n" % h if len(deleted_annotations.sheets) > 0: print >> log, "Removing the following sheets because there are" print >> log, "no corresponding atoms in the model:" for sh in deleted_annotations.sheets: print >> log, sh.as_pdb_str() error_msg += " %s\n" % sh.as_pdb_str(strand_id=st.strand_id) else: raise Sorry(error_msg) phil_str = ann.as_restraint_groups() # gathering initial special position atoms special_position_settings = crystal.special_position_settings( crystal_symmetry = xray_structure.crystal_symmetry()) site_symmetry_table = \ special_position_settings.site_symmetry_table( sites_cart = real_h.atoms().extract_xyz(), unconditional_general_position_flags=( real_h.atoms().extract_occ() != 1)) original_spi = site_symmetry_table.special_position_indices() t2 = time() # Actually idelizing SS elements log.write("Replacing ss-elements with ideal ones:\n") log.flush() for h in ann.helices: log.write(" %s\n" % h.as_pdb_str()) log.flush() selstring = h.as_atom_selections() isel = selection_cache.iselection(selstring[0]) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = real_h.select(all_bsel, copy_atoms=True) ideal_h = get_helix(helix_class=h.helix_class, pdb_hierarchy_template=sel_h, rotamer_manager=rotamer_manager) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) set_xyz_carefully(dest_h=edited_h.select(all_bsel), source_h=ideal_h) for sh in ann.sheets: s = " %s\n" % sh.as_pdb_str() ss = s.replace("\n", "\n ") log.write(ss[:-2]) log.flush() for st in sh.strands: selstring = st.as_atom_selections() isel = selection_cache.iselection(selstring) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = real_h.select(all_bsel, copy_atoms=True) ideal_h = secondary_structure_from_sequence( pdb_str=beta_pdb_str, sequence=None, pdb_hierarchy_template=sel_h, rotamer_manager=rotamer_manager, ) set_xyz_carefully(edited_h.select(all_bsel), ideal_h) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) t3 = time() pre_result_h = edited_h pre_result_h.reset_i_seq_if_necessary() n_atoms = real_h.atoms_size() bsel = flex.bool(n_atoms, False) helix_selection = flex.bool(n_atoms, False) sheet_selection = flex.bool(n_atoms, False) other_selection = flex.bool(n_atoms, False) ss_for_tors_selection = flex.bool(n_atoms, False) nonss_for_tors_selection = flex.bool(n_atoms, False) selection_cache = real_h.atom_selection_cache() # set all CA atoms to True for other_selection #isel = selection_cache.iselection("name ca") isel = selection_cache.iselection("name ca or name n or name o or name c") other_selection.set_selected(isel, True) n_main_chain_atoms = other_selection.count(True) isel = selection_cache.iselection("name ca or name n or name o or name c") nonss_for_tors_selection.set_selected(isel, True) main_chain_selection_prefix = "(name ca or name n or name o or name c) %s" t4 = time() print >> log, "Preparing selections..." log.flush() # Here we are just preparing selections for h in ann.helices: ss_sels = h.as_atom_selections()[0] selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) helix_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) for sheet in ann.sheets: for ss_sels in sheet.as_atom_selections(): selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) sheet_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) t5 = time() isel = selection_cache.iselection( "not name ca and not name n and not name o and not name c") other_selection.set_selected(isel, False) helix_sheet_intersection = helix_selection & sheet_selection if helix_sheet_intersection.count(True) > 0: sheet_selection = sheet_selection & ~helix_sheet_intersection assert ((helix_selection | sheet_selection) & other_selection).count(True)==0 from mmtbx.monomer_library.pdb_interpretation import grand_master_phil_str params_line = grand_master_phil_str params_line += "secondary_structure {%s}" % secondary_structure.sec_str_master_phil_str # print "params_line" # print params_line params = iotbx.phil.parse(input_string=params_line, process_includes=True)#.extract() # This does not the same way for a strange reason. Need to investigate. # The number of resulting hbonds is different later. # w_params = params.extract() # w_params.pdb_interpretation.secondary_structure.protein.remove_outliers = False # w_params.pdb_interpretation.peptide_link.ramachandran_restraints = True # w_params.pdb_interpretation.c_beta_restraints = True # w_params.pdb_interpretation.secondary_structure.enabled = True # params.format(python_object=w_params) # params.show() # print "="*80 # print "="*80 # print "="*80 if grm is None: custom_par_text = "\n".join([ "pdb_interpretation.secondary_structure {protein.remove_outliers = False\n%s}" \ % phil_str, "pdb_interpretation.peptide_link.ramachandran_restraints = True", "c_beta_restraints = True", "pdb_interpretation.secondary_structure.enabled=True", "pdb_interpretation.clash_guard.nonbonded_distance_threshold=None", "pdb_interpretation.max_reasonable_bond_distance=None", # "pdb_interpretation.nonbonded_weight=500", "pdb_interpretation.peptide_link.oldfield.weight_scale=3", "pdb_interpretation.peptide_link.oldfield.plot_cutoff=0.03", "pdb_interpretation.peptide_link.omega_esd_override_value=3", "pdb_interpretation.peptide_link.apply_all_trans=True", ]) if use_plane_peptide_bond_restr: custom_par_text += "\npdb_interpretation.peptide_link.apply_peptide_plane=True" custom_pars = params.fetch( source=iotbx.phil.parse(custom_par_text)).extract() # params.format(python_object=custom_pars) # params.show() # STOP() params = custom_pars # params = w_params t6 = time() import mmtbx.utils processed_pdb_files_srv = mmtbx.utils.\ process_pdb_file_srv( crystal_symmetry= xray_structure.crystal_symmetry(), pdb_interpretation_params = params.pdb_interpretation, log=null_out(), cif_objects=cif_objects) if verbose: print >> log, "Processing file..." log.flush() processed_pdb_file, junk = processed_pdb_files_srv.\ process_pdb_files(raw_records=flex.split_lines(real_h.as_pdb_string())) t7 = time() grm = get_geometry_restraints_manager( processed_pdb_file, xray_structure) t8 = time() else: ss_params = secondary_structure.default_params ss_params.secondary_structure.protein.remove_outliers=False ss_manager = secondary_structure.manager( pdb_hierarchy=real_h, geometry_restraints_manager=grm.geometry, sec_str_from_pdb_file=ss_annotation, params=ss_params.secondary_structure, mon_lib_srv=None, verbose=-1, log=log) grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=real_h, log=log) real_h.reset_i_seq_if_necessary() from mmtbx.geometry_restraints import reference if reference_map is None: if verbose: print >> log, "Adding reference coordinate restraints..." grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(helix_selection), selection = helix_selection, sigma = processed_params.sigma_on_reference_helix)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(sheet_selection), selection = sheet_selection, sigma = processed_params.sigma_on_reference_sheet)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(other_selection), selection = other_selection, sigma = processed_params.sigma_on_reference_non_ss)) if verbose: print >> log, "Adding chi torsion restraints..." grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = pre_result_h, sites_cart = pre_result_h.atoms().extract_xyz().\ select(ss_for_tors_selection), selection = ss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_ss) grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = pre_result_h, sites_cart = real_h.atoms().extract_xyz().\ select(nonss_for_tors_selection), selection = nonss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_nonss) real_h.atoms().set_xyz(pre_result_h.atoms().extract_xyz()) # # Check and correct for special positions # special_position_settings = crystal.special_position_settings( crystal_symmetry = xray_structure.crystal_symmetry()) site_symmetry_table = \ special_position_settings.site_symmetry_table( sites_cart = real_h.atoms().extract_xyz(), unconditional_general_position_flags=( real_h.atoms().extract_occ() != 1)) spi = site_symmetry_table.special_position_indices() if spi.size() > 0: print >> log, "Moving atoms from special positions:" for spi_i in spi: if spi_i not in original_spi: new_coords = ( real_h.atoms()[spi_i].xyz[0]+0.2, real_h.atoms()[spi_i].xyz[1]+0.2, real_h.atoms()[spi_i].xyz[2]+0.2) print >> log, " ", real_h.atoms()[spi_i].id_str(), print >> log, tuple(real_h.atoms()[spi_i].xyz), "-->", new_coords real_h.atoms()[spi_i].set_xyz(new_coords) t9 = time() if processed_params.file_name_before_regularization is not None: grm.geometry.pair_proxies(sites_cart=real_h.atoms().extract_xyz()) if grm.geometry.ramachandran_manager is not None: grm.geometry.ramachandran_manager.update_phi_psi_targets( sites_cart=real_h.atoms().extract_xyz()) print >> log, "Outputting model before regularization %s" % processed_params.file_name_before_regularization real_h.write_pdb_file( file_name=processed_params.file_name_before_regularization) geo_fname = processed_params.file_name_before_regularization[:-4]+'.geo' print >> log, "Outputting geo file for regularization %s" % geo_fname grm.write_geo_file( site_labels=[atom.id_str() for atom in real_h.atoms()], file_name=geo_fname) #testing number of restraints assert grm.geometry.get_n_den_proxies() == 0 if reference_map is None: assert grm.geometry.get_n_reference_coordinate_proxies() == n_main_chain_atoms refinement_log = null_out() log.write( "Refining geometry of substituted secondary structure elements...") log.flush() if verbose: refinement_log = log from mmtbx.refinement.geometry_minimization import run2 t10 = time() if reference_map is None: obj = run2( restraints_manager = grm, pdb_hierarchy = real_h, correct_special_position_tolerance = 1.0, max_number_of_iterations = processed_params.n_iter, number_of_macro_cycles = processed_params.n_macro, bond = True, nonbonded = True, angle = True, dihedral = True, chirality = True, planarity = True, fix_rotamer_outliers = fix_rotamer_outliers, log = refinement_log) else: ref_xrs = real_h.extract_xray_structure( crystal_symmetry=xray_structure.crystal_symmetry()) minimize_wrapper_with_map( pdb_h=real_h, xrs=ref_xrs, target_map=reference_map, grm=grm, ncs_restraints_group_list=[], mon_lib_srv=None, ss_annotation=ss_annotation, refine_ncs_operators=False, number_of_cycles=processed_params.n_macro, log=log) real_h.write_pdb_file("after_ss_map_min.pdb") log.write(" Done\n") log.flush() t11 = time() # print >> log, "Initial checking, init : %.4f" % (t1-t0) # print >> log, "Checking SS : %.4f" % (t2-t1) # print >> log, "Initializing selections : %.4f" % (t4-t3) # print >> log, "Looping for selections : %.4f" % (t5-t4) # print >> log, "Finalizing selections : %.4f" % (t6-t5) # print >> log, "PDB interpretation : %.4f" % (t7-t6) # print >> log, "Get GRM : %.4f" % (t8-t7) # print >> log, "Adding restraints to GRM : %.4f" % (t9-t8) # print >> log, "Running GM : %.4f" % (t11-t10) # print_hbond_proxies(grm.geometry,real_h) return grm.geometry.get_chi_torsion_proxies()
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False, tried_rama_angles={}, tried_final_rama_angles={}, n_run=0): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation=secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager self.ideal_res_dict = idealized_aa.residue_dict() self.n_run = n_run if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None # here we are recording what CCD solutions were used to fix particular # outliers to not use the same in the next CCD try. # Nested dict. First level: # key: chain id, value: dict # key: resid (string), value: list of tried variants. self.tried_rama_angles = tried_rama_angles self.tried_final_rama_angles = tried_final_rama_angles berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" self.number_of_ccd_trials = 0 # print "logic expr outcome:", (self.number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print self.number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if (self.berkeley_p_before_minimization_rama_outliers <= 0.001 and (n_bad_omegas<1 and self.params.make_all_trans)): print >> self.log, "No ramachandran outliers, skipping CCD step." print "n_bad_omegas", n_bad_omegas print "self.params.make_all_trans",self.params.make_all_trans if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (self.number_of_ccd_trials < self.params.number_of_ccd_trials and (self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 or (n_bad_omegas>=1 and self.params.make_all_trans)) and self.params.enabled): print >> self.log, "CCD try number, outliers:", self.number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): if chain.id not in self.tried_rama_angles.keys(): self.tried_rama_angles[chain.id] = {} if chain.id not in self.tried_final_rama_angles.keys(): self.tried_final_rama_angles[chain.id] = {} print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h), tried_rama_angles_for_chain=self.tried_rama_angles[chain.id], tried_final_rama_angles_for_chain=self.tried_final_rama_angles[chain.id]) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection print "self.tried_rama_angles", self.tried_rama_angles print "self.tried_final_rama_angles", self.tried_final_rama_angles # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count self.resulting_pdb_h.write_pdb_file(file_name="%d%s_discrepancy.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_not_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.minimize_whole: print >> self.log, "minimizing whole chain..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel # XXX but first let's check and fix rotamers... print >> self.log, "Fixing/checking rotamers in loop idealization..." excl_sel = self.ref_exclusion_selection if len(excl_sel) == 0: excl_sel = None non_outliers_for_check = asc.selection("(%s)" % self.ref_exclusion_selection) pre_result_h = mmtbx.utils.fix_rotamer_outliers( pdb_hierarchy=self.resulting_pdb_h, grm=self.grm.geometry, xrs=self.xrs, map_data=self.reference_map, radius=5, mon_lib_srv=None, rotamer_manager=self.rotamer_manager, backrub_range=None, # don't sample backrub at this point non_outliers_to_check=non_outliers_for_check, # bool selection asc=asc, verbose=True, log=self.log) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, number_of_cycles=Auto, log=self.log) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count print >> self.log, "Number of bad omegas:", n_bad_omegas self.number_of_ccd_trials += 1
def fix_rama_outlier(self, pdb_hierarchy, out_res_num_list, prefix="", minimize=True, ss_annotation=None, tried_rama_angles_for_chain={}, tried_final_rama_angles_for_chain={}): def comb_pair_in_bad_pairs(comb_pair, bad_pairs): if None in comb_pair: return False all_combs = [comb_pair] all_combs.append((comb_pair[0]-20, comb_pair[1])) all_combs.append((comb_pair[0]+20, comb_pair[1])) all_combs.append((comb_pair[0], comb_pair[1]-20)) all_combs.append((comb_pair[0], comb_pair[1]+20)) all_c_adj = [] for p in all_combs: new_p = p if p[0] > 180: new_p = (p[0]-180, p[1]) if p[0] < -180: new_p = (p[0]+180, p[1]) if p[1] > 180: new_p = (p[0], p[1]-180) if p[0] < -180: new_p = (p[0], p[1]+180) all_c_adj.append(new_p) for p in all_c_adj: if p in bad_pairs: return True return False original_pdb_h = pdb_hierarchy.deep_copy() original_pdb_h.reset_atom_i_seqs() original_pdb_h_asc = original_pdb_h.atom_selection_cache() chain_id = original_pdb_h.only_model().only_chain().id all_results = [] # only forward # variants_searches = [ # #ccd_radius, change_all, change_radius, direction_forward # ((1, False, 0, True ),1), # # ((1, False, 0, False),1), # ((2, False, 0, True ),1), # # ((2, False, 0, False),1), # ((3, False, 0, True ),2), # # ((3, False, 0, False),2), # ((2, True, 1, True ),1), # # ((2, True, 1, False),1), # ((3, True, 1, True ),2), # # ((3, True, 1, False),2), # ((3, True, 2, True ),3), # # ((3, True, 2, False),3), # ] # only backward # variants_searches = [ # #ccd_radius, change_all, change_radius, direction_forward # # ((1, False, 0, True ),1), # ((1, False, 0, False),1), # # ((2, False, 0, True ),1), # ((2, False, 0, False),1), # # ((3, False, 0, True ),2), # ((3, False, 0, False),2), # # ((2, True, 1, True ),1), # ((2, True, 1, False),1), # # ((3, True, 1, True ),2), # ((3, True, 1, False),2), # # ((3, True, 2, True ),3), # ((3, True, 2, False),3), # ] # both variants_searches = [ #ccd_radius, change_all, change_radius, direction_forward ((1, False, 0, True ),1), ((1, False, 0, False),1), ((2, False, 0, True ),1), ((2, False, 0, False),1), ((3, False, 0, True ),2), ((3, False, 0, False),2), ((2, True, 1, True ),1), ((2, True, 1, False),1), ((3, True, 1, True ),2), ((3, True, 1, False),2), ((3, True, 2, True ),3), ((3, True, 2, False),3), ] decided_variants = [] for variant, level in variants_searches: if level <= self.params.variant_search_level: decided_variants.append(variant) for ccd_radius, change_all, change_radius, direction_forward in decided_variants: # while ccd_radius <= 3: fixing_omega = False print >> self.log, " Starting optimization with radius=%d, " % ccd_radius, print >> self.log, "change_all=%s, change_radius=%d, " % (change_all, change_radius), print >> self.log, "direction=forward" if direction_forward else "direction=backwards" self.log.flush() # (moving_h, moving_ref_atoms_iseqs, fixed_ref_atoms, m_selection, contains_ss_element, anchor_present) = get_fixed_moving_parts( pdb_hierarchy=pdb_hierarchy, out_res_num_list=out_res_num_list, # n_following=1, # n_previous=ccd_radius+ccd_radius-1, n_following=ccd_radius, n_previous=ccd_radius, ss_annotation=ss_annotation, direction_forward=direction_forward, log=self.log) # print " moving_ref_atoms_iseqs", moving_ref_atoms_iseqs print " moving_h resseqs:", [x.resseq for x in moving_h.residue_groups()] moving_h_set = [] all_angles_combination_f = starting_conformations.get_all_starting_conformations( moving_h, change_radius, n_outliers=len(out_res_num_list), direction_forward=direction_forward, cutoff=self.params.variant_number_cutoff, change_all=change_all, # log=self.log, check_omega=self.params.make_all_trans, ) # # print "len(all_angles_combination_f)", len(all_angles_combination_f) if len(all_angles_combination_f) == 0: print "In starting conformations - outlier was fixed?" # return result else: # here we should filter first ones that in # tried_rama_angles_for_chain filter_out = [] # [[tried values],[tried values],...] for three in generate_protein_threes( hierarchy=moving_h, geometry=None): if three[1].resseq in tried_rama_angles_for_chain.keys(): filter_out.append(tried_rama_angles_for_chain[three[1].resseq]) else: filter_out.append((None, None)) ff_all_angles = [] print "filter_out", filter_out for comb in all_angles_combination_f: good = True for comb_pair, bad_pairs in zip(comb, filter_out): if bad_pairs == (None, None): continue # print "comb_pair, bad_pairs", comb_pair, bad_pairs # if comb_pair in bad_pairs: if comb_pair_in_bad_pairs(comb_pair, bad_pairs): good = False # print " Rejecting comb_pair", comb_pair break if good: ff_all_angles.append(comb) print "len(all_angles_combination_f)", len(all_angles_combination_f) print "len(ff_all_angles)", len(ff_all_angles) n_added = 0 n_all_combination = len(ff_all_angles) if n_all_combination == 0: print >> self.log, "Strange - got 0 combinations." i_max = min(self.params.variant_number_cutoff, n_all_combination) # assert i_max > 0 step = 0 if i_max > 1: step = float(n_all_combination-1)/float(i_max-1) if step < 1: step = 1 for i in range(i_max): comb = ff_all_angles[int(round(step*i))] setted_h, fixed_omega = starting_conformations.set_rama_angles( moving_h, list(comb), direction_forward=direction_forward, check_omega=self.params.make_all_trans) fixing_omega = fixing_omega or fixed_omega moving_h_set.append(setted_h) # print >> self.log, "Model %d, angles:" % i, comb if self.params.make_all_trans and utils.n_bad_omegas(moving_h_set[-1]) != 0: print "Model_%d_angles_%s.pdb" % (i, comb), print "got ", utils.n_bad_omegas(moving_h_set[-1]), "bad omegas" moving_h_set[-1].write_pdb_file("Model_%d_angles_%s.pdb" % (i, comb)) utils.list_omega(moving_h_set[-1], self.log) assert 0 if len(moving_h_set) == 0: # outlier was fixed before somehow... # or there's a bug in get_starting_conformations print >> self.log, "outlier was fixed before somehow" return original_pdb_h print "self.tried_rama_angles inside", self.tried_rama_angles print "tried_rama_angles_for_chain", tried_rama_angles_for_chain print "checking values", ccd_radius, change_all, change_radius, direction_forward for i, h in enumerate(moving_h_set): # if [x in tried_rama_angles_for_chain.keys() for x in out_res_num_list].count(True) > 0: # print >> self.log, "Warning!!! make something here (check angles or so)" # print >> self.log, "Skipping nonstable solution, tried previously:", (ccd_radius, change_all, change_radius, direction_forward, i) # continue resulting_rmsd = None n_iter = 0 if anchor_present: fixed_ref_atoms_coors = [x.xyz for x in fixed_ref_atoms] # print "params to constructor", fixed_ref_atoms, h, moving_ref_atoms_iseqs # easy_pickle.dump(file_name="crash.pkl", obj=[ # fixed_ref_atoms_coors, # h, # moving_ref_atoms_iseqs, # direction_forward, # self.params.save_states]) ccd_obj = ccd_cpp(fixed_ref_atoms_coors, h, moving_ref_atoms_iseqs) ccd_obj.run(direction_forward=direction_forward, save_states=self.params.save_states) resulting_rmsd = ccd_obj.resulting_rmsd n_iter = ccd_obj.n_iter if self.params.save_states: states = ccd_obj.states states.write(file_name="%s%s_%d_%s_%d_%i_states.pdb" % (chain_id, out_res_num_list[0], ccd_radius, change_all, change_radius, i)) map_target = 0 if self.reference_map is not None: map_target = maptbx.real_space_target_simple( unit_cell = self.xrs.crystal_symmetry().unit_cell(), density_map = self.reference_map, sites_cart = h.atoms().extract_xyz()) mc_rmsd = get_main_chain_rmsd_range(moving_h, h, all_atoms=True) if self.verbose: print >> self.log, "Resulting anchor and backbone RMSDs, mapcc, n_iter for model %d:" % i, print >> self.log, resulting_rmsd, ",", mc_rmsd, ",", map_target, ",", n_iter self.log.flush() # # setting new coordinates # moved_with_side_chains_h = pdb_hierarchy.deep_copy() # setting xyz # for i_source, i_dest in enumerate(m_selection): moved_with_side_chains_h.atoms()[i_dest].set_xyz(h.atoms()[i_source].xyz) # set_xyz_smart( # dest_h=moved_with_side_chains_h, # source_h=h) # # placing side-chains # # moved_with_side_chains_h.write_pdb_file( # file_name="%s_before_sc_placement_%d.pdb" % (prefix, i)) placing_range = get_res_nums_around(moved_with_side_chains_h, center_resnum_list=out_res_num_list, n_following=ccd_radius, n_previous=ccd_radius, include_intermediate=True, avoid_ss_annot=ss_annotation) place_side_chains(moved_with_side_chains_h, original_pdb_h, original_pdb_h_asc, self.rotamer_manager, placing_range, self.ideal_res_dict) # moved_with_side_chains_h.write_pdb_file( # file_name="%s_after_sc_placement_%d.pdb" % (prefix, i)) # # finalizing with geometry_minimization # # determining angles of interest # print "Recording picked angle for outliers" threes = generate_protein_threes( # hierarchy=moving_h, hierarchy=h, geometry=None) start_angles = [] final_angles = [] for angle_pair, three in zip(ff_all_angles[int(round(step*i))], threes): # print "three[1].resseq in out_res_num_list, angle_pair", three[1].resseq, out_res_num_list, angle_pair if three[1].resseq in out_res_num_list: # if three[1].resseq not in tried_rama_angles_for_chain.keys(): # tried_rama_angles_for_chain[three[1].resseq] = [] start_angles.append((three[1].resseq, angle_pair)) ps_angles = three.get_phi_psi_angles() final_angles.append((three[1].resseq, tuple(ps_angles))) # tried_rama_angles_for_chain[three[1].resseq].append(angle_pair) # print >> self.log, "Ended up with", three[1].resseq, "%.1f %.1f" % (ps_angles[0], ps_angles[1]) # print "Updated tried_rama_angles_for_chain:", tried_rama_angles_for_chain if (not self.ccd_solution_is_duplicated( final_angles=final_angles, tried_final_rama_angles_for_chain=tried_final_rama_angles_for_chain)): all_results.append((moved_with_side_chains_h.deep_copy(), mc_rmsd, resulting_rmsd, map_target, n_iter)) else: continue if self.ccd_solution_is_ok( anchor_rmsd=resulting_rmsd, mc_rmsd=mc_rmsd, n_outliers=len(out_res_num_list), ccd_radius=ccd_radius, change_all_angles=change_all, change_radius=change_radius, contains_ss_element=contains_ss_element, fixing_omega=fixing_omega): print "Choosen result (mc_rmsd, anchor_rmsd, map_target, n_iter):", mc_rmsd, resulting_rmsd, map_target, n_iter # Save to tried_ccds for rn, angles in start_angles: if rn not in tried_rama_angles_for_chain.keys(): tried_rama_angles_for_chain[rn] = [] tried_rama_angles_for_chain[rn].append(angles) # Save final angles for rn, angles in final_angles: if rn not in tried_final_rama_angles_for_chain.keys(): tried_final_rama_angles_for_chain[rn] = [] tried_final_rama_angles_for_chain[rn].append(angles) print >> self.log, "Ended up with", final_angles print >> self.log, "Updated tried_rama_angles_for_chain:", tried_rama_angles_for_chain print >> self.log, "Updated tried_final_rama_angles_for_chain:", tried_final_rama_angles_for_chain self.log.flush() if minimize: print >> self.log, "minimizing..." # moved_with_side_chains_h.write_pdb_file( # file_name="%s_result_before_min_%d.pdb" % (prefix, i)) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=moved_with_side_chains_h, xrs=xrs, original_pdb_h=original_pdb_h, log=self.log, grm=self.grm, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=moved_with_side_chains_h, xrs=xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, log=self.log) # moved_with_side_chains_h.write_pdb_file( # file_name="%s_result_minimized_%d.pdb" % (prefix, i)) final_rmsd = get_main_chain_rmsd_range(moved_with_side_chains_h, original_pdb_h, placing_range) print >> self.log, "FINAL RMSD after minimization:", final_rmsd return moved_with_side_chains_h all_results.sort(key=lambda tup: tup[1]) if self.verbose: print >> self.log, "ALL RESULTS:" i = 0 for ar in all_results: print >> self.log, ar[1:], if ar[2] < 0.4: # fn = "variant_%d.pdb" % i # ar[0].write_pdb_file(file_name=fn) # print fn i += 1 else: print >> self.log, " no output" if self.params.force_rama_fixes: # find and apply the best varian from all_results. This would be the one # with the smallest rmsd given satisfactory closure print >> self.log, "Applying the best found variant:", i = 0 while i < len(all_results) and all_results[i][2] > 1.5: i += 1 # apply # === duplication!!!! if i < len(all_results): print >> self.log, all_results[i][1:] if minimize: print >> self.log, "minimizing..." # all_results[i][0].write_pdb_file( # file_name="%s_result_before_min_%d.pdb" % (prefix, i)) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=all_results[i][0], xrs=xrs, original_pdb_h=original_pdb_h, log=self.log, grm=self.grm, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=all_results[i][0], xrs=xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, log=self.log) # all_results[i][0].write_pdb_file( # file_name="%s_result_minimized_%d.pdb" % (prefix, i)) final_rmsd = get_main_chain_rmsd_range(all_results[i][0], original_pdb_h, placing_range) print >> self.log, "FINAL RMSD after minimization:", final_rmsd return all_results[i][0] else: print >> self.log, " NOT FOUND!" for i in all_results: print >> self.log, i[1:] # === end of duplication!!!! else: print >> self.log, "Epic FAIL: failed to fix rama outlier:", out_res_num_list print >> self.log, " Options were: (mc_rmsd, resultign_rmsd, n_iter)" for i in all_results: print >> self.log, i[1:] return original_pdb_h
def substitute_ss( model, # changed in place params = None, use_plane_peptide_bond_restr=True, fix_rotamer_outliers=True, log=null_out(), check_rotamer_clashes=True, reference_map=None, verbose=False): """ Substitute secondary structure elements in real_h hierarchy with ideal ones _in_place_. Returns reference torsion proxies - the only thing that cannot be restored with little effort outside the procedure. real_h - hierarcy to substitute secondary structure elements. xray_structure - xray_structure - needed to get crystal symmetry (to construct processed_pdb_file and xray_structure is needed to call get_geometry_restraints_manager for no obvious reason). ss_annotation - iotbx.pdb.annotation object. """ import mmtbx.utils ss_annotation = model.get_ss_annotation() t0 = time() if model.get_hierarchy().models_size() > 1: raise Sorry("Multi model files are not supported") for m in model.get_hierarchy().models(): for chain in m.chains(): if len(chain.conformers()) > 1: raise Sorry("Alternative conformations are not supported.") processed_params = process_params(params) if not processed_params.enabled: return None if ss_annotation is None: return None ann = ss_annotation if model.ncs_constraints_present(): print >> log, "Using master NCS to reduce amount of work" expected_n_hbonds = 0 for h in ann.helices: expected_n_hbonds += h.get_n_maximum_hbonds() edited_h = model.get_hierarchy().deep_copy() n_atoms_in_real_h = model.get_number_of_atoms() selection_cache = model.get_atom_selection_cache() # check the annotation for correctness (atoms are actually in hierarchy) error_msg = "The following secondary structure annotations result in \n" error_msg +="empty atom selections. They don't match the structre: \n" t1 = time() # Checking for SS selections deleted_annotations = ann.remove_empty_annotations( hierarchy=model.get_hierarchy(), asc=selection_cache) if not deleted_annotations.is_empty(): if processed_params.skip_empty_ss_elements: if len(deleted_annotations.helices) > 0: print >> log, "Removing the following helices because there are" print >> log, "no corresponding atoms in the model:" for h in deleted_annotations.helices: print >> log, h.as_pdb_str() error_msg += " %s\n" % h if len(deleted_annotations.sheets) > 0: print >> log, "Removing the following sheets because there are" print >> log, "no corresponding atoms in the model:" for sh in deleted_annotations.sheets: print >> log, sh.as_pdb_str() error_msg += " %s\n" % sh.as_pdb_str(strand_id=st.strand_id) else: raise Sorry(error_msg) phil_str = ann.as_restraint_groups() # gathering initial special position atoms special_position_settings = crystal.special_position_settings( crystal_symmetry = model.crystal_symmetry()) site_symmetry_table = \ special_position_settings.site_symmetry_table( sites_cart = model.get_sites_cart(), unconditional_general_position_flags=( model.get_atoms().extract_occ() != 1)) original_spi = site_symmetry_table.special_position_indices() t2 = time() # Actually idelizing SS elements fixed_ss_selection = flex.bool(n_atoms_in_real_h, False) log.write("Replacing ss-elements with ideal ones:\n") log.flush() ss_stats = gather_ss_stats(pdb_h=model.get_hierarchy()) n_idealized_elements = 0 master_bool_sel = model.get_master_selection() if master_bool_sel is None or master_bool_sel.size() == 0: master_bool_sel = flex.bool(model.get_number_of_atoms(), True) elif isinstance(master_bool_sel, flex.size_t): master_bool_sel = flex.bool(model.get_number_of_atoms(), master_bool_sel) assert master_bool_sel.size() == model.get_number_of_atoms() for h in ann.helices: log.write(" %s\n" % h.as_pdb_str()) log.flush() if processed_params.skip_good_ss_elements and ss_element_is_good(ss_stats, ([h],[])): log.write(" skipping, good element.\n") else: selstring = h.as_atom_selections() sel = selection_cache.selection(selstring[0]) isel = sel.iselection() if (master_bool_sel & sel).iselection().size() == 0: log.write(" skipping, not in NCS master.\n") continue n_idealized_elements += 1 log.write(" substitute with idealized one.\n") fixed_ss_selection.set_selected(isel, True) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = model.get_hierarchy().select(all_bsel, copy_atoms=True) ideal_h = get_helix(helix_class=h.helix_class, pdb_hierarchy_template=sel_h, rotamer_manager=model.get_rotamer_manager()) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) set_xyz_carefully(dest_h=edited_h.select(all_bsel), source_h=ideal_h) # set_xyz_smart(dest_h=edited_h.select(all_bsel), source_h=ideal_h) # does not work here for sh in ann.sheets: s = " %s\n" % sh.as_pdb_str() ss = s.replace("\n", "\n ") log.write(ss[:-2]) log.flush() if processed_params.skip_good_ss_elements and ss_element_is_good(ss_stats, ([],[sh])): log.write(" skipping, good element.\n") else: full_sh_selection = flex.bool(n_atoms_in_real_h, False) for st in sh.strands: selstring = st.as_atom_selections() isel = selection_cache.iselection(selstring) full_sh_selection.set_selected(isel, True) if (master_bool_sel & full_sh_selection).iselection().size() == 0: log.write(" skipping, not in NCS master.\n") continue n_idealized_elements += 1 log.write(" substitute with idealized one.\n") for st in sh.strands: selstring = st.as_atom_selections() isel = selection_cache.iselection(selstring) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) fixed_ss_selection.set_selected(isel, True) sel_h = model.get_hierarchy().select(all_bsel, copy_atoms=True) ideal_h = secondary_structure_from_sequence( pdb_str=beta_pdb_str, sequence=None, pdb_hierarchy_template=sel_h, rotamer_manager=model.get_rotamer_manager(), ) set_xyz_carefully(edited_h.select(all_bsel), ideal_h) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) if n_idealized_elements == 0: log.write("Nothing was idealized.\n") # Don't do geometry minimization and stuff if nothing was changed. return None # XXX here we want to adopt new coordinates model.set_sites_cart(sites_cart=edited_h.atoms().extract_xyz()) if model.ncs_constraints_present(): model.set_sites_cart_from_hierarchy(multiply_ncs=True) t3 = time() # pre_result_h = edited_h # pre_result_h.reset_i_seq_if_necessary() bsel = flex.bool(n_atoms_in_real_h, False) helix_selection = flex.bool(n_atoms_in_real_h, False) sheet_selection = flex.bool(n_atoms_in_real_h, False) other_selection = flex.bool(n_atoms_in_real_h, False) ss_for_tors_selection = flex.bool(n_atoms_in_real_h, False) nonss_for_tors_selection = flex.bool(n_atoms_in_real_h, False) # set all CA atoms to True for other_selection #isel = selection_cache.iselection("name ca") isel = selection_cache.iselection("name ca or name n or name o or name c") other_selection.set_selected(isel, True) n_main_chain_atoms = other_selection.count(True) isel = selection_cache.iselection("name ca or name n or name o or name c") nonss_for_tors_selection.set_selected(isel, True) main_chain_selection_prefix = "(name ca or name n or name o or name c) %s" t4 = time() print >> log, "Preparing selections..." log.flush() # Here we are just preparing selections for h in ann.helices: ss_sels = h.as_atom_selections()[0] selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) helix_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) for sheet in ann.sheets: for ss_sels in sheet.as_atom_selections(): selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) sheet_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) t5 = time() isel = selection_cache.iselection( "not name ca and not name n and not name o and not name c") other_selection.set_selected(isel, False) helix_sheet_intersection = helix_selection & sheet_selection if helix_sheet_intersection.count(True) > 0: sheet_selection = sheet_selection & ~helix_sheet_intersection assert ((helix_selection | sheet_selection) & other_selection).count(True)==0 from mmtbx.monomer_library.pdb_interpretation import grand_master_phil_str params_line = grand_master_phil_str params_line += "secondary_structure {%s}" % secondary_structure.sec_str_master_phil_str # print "params_line" # print params_line params = iotbx.phil.parse(input_string=params_line, process_includes=True)#.extract() # This does not work the same way for a strange reason. Need to investigate. # The number of resulting hbonds is different later. # w_params = params.extract() # w_params.pdb_interpretation.secondary_structure.protein.remove_outliers = False # w_params.pdb_interpretation.peptide_link.ramachandran_restraints = True # w_params.pdb_interpretation.c_beta_restraints = True # w_params.pdb_interpretation.secondary_structure.enabled = True # params.format(python_object=w_params) # params.show() # print "="*80 # print "="*80 # print "="*80 grm = model.get_restraints_manager() ssm_log = null_out() if verbose: ssm_log = log ss_params = secondary_structure.sec_str_master_phil.fetch().extract() ss_params.secondary_structure.protein.remove_outliers=False ss_manager = secondary_structure.manager( pdb_hierarchy=model.get_hierarchy(), geometry_restraints_manager=grm.geometry, sec_str_from_pdb_file=ss_annotation, params=ss_params.secondary_structure, mon_lib_srv=None, verbose=-1, log=ssm_log) grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=model.get_hierarchy(), log=ssm_log) model.get_hierarchy().reset_i_seq_if_necessary() from mmtbx.geometry_restraints import reference if reference_map is None: if verbose: print >> log, "Adding reference coordinate restraints..." grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = model.get_sites_cart().select(helix_selection), selection = helix_selection, sigma = processed_params.sigma_on_reference_helix)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = model.get_sites_cart().select(sheet_selection), selection = sheet_selection, sigma = processed_params.sigma_on_reference_sheet)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = model.get_sites_cart().select(other_selection), selection = other_selection, sigma = processed_params.sigma_on_reference_non_ss)) # XXX Somewhere here we actually should check placed side-chains for # clashes because we used ones that were in original model and just moved # them to nearest allowed rotamer. The idealization may affect a lot # the orientation of side chain thus justifying changing rotamer on it # to avoid clashes. if check_rotamer_clashes: print >> log, "Fixing/checking rotamers..." # pre_result_h.write_pdb_file(file_name="before_rotamers.pdb") br_txt = model.model_as_pdb() with open("before_rotamers.pdb", 'w') as f: f.write(br_txt) mmtbx.utils.fix_rotamer_outliers( model = model, map_data=reference_map, radius=5, backrub_range=None, # don't sample backrub at this point non_outliers_to_check=fixed_ss_selection, # bool selection verbose=True, log=log) if verbose: print >> log, "Adding chi torsion restraints..." # only backbone grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = model.get_hierarchy(), sites_cart = model.get_sites_cart().\ select(ss_for_tors_selection), selection = ss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_ss) grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = model.get_hierarchy(), sites_cart = model.get_sites_cart().\ select(nonss_for_tors_selection), selection = nonss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_nonss) # real_h.atoms().set_xyz(pre_result_h.atoms().extract_xyz()) # # Check and correct for special positions # real_h = model.get_hierarchy() # just a shortcut here... special_position_settings = crystal.special_position_settings( crystal_symmetry = model.crystal_symmetry()) site_symmetry_table = \ special_position_settings.site_symmetry_table( sites_cart = model.get_sites_cart(), unconditional_general_position_flags=( model.get_atoms().extract_occ() != 1)) spi = site_symmetry_table.special_position_indices() if spi.size() > 0: print >> log, "Moving atoms from special positions:" for spi_i in spi: if spi_i not in original_spi: new_coords = ( real_h.atoms()[spi_i].xyz[0]+0.2, real_h.atoms()[spi_i].xyz[1]+0.2, real_h.atoms()[spi_i].xyz[2]+0.2) print >> log, " ", real_h.atoms()[spi_i].id_str(), print >> log, tuple(real_h.atoms()[spi_i].xyz), "-->", new_coords real_h.atoms()[spi_i].set_xyz(new_coords) model.set_sites_cart_from_hierarchy() t9 = time() if processed_params.file_name_before_regularization is not None: grm.geometry.pair_proxies(sites_cart=model.get_sites_cart()) if grm.geometry.ramachandran_manager is not None: grm.geometry.ramachandran_manager.update_phi_psi_targets( sites_cart=model.get_sites_cart()) print >> log, "Outputting model before regularization %s" % processed_params.file_name_before_regularization m_txt = model.model_as_pdb() g_txt = model.restraints_as_geo() with open(processed_params.file_name_before_regularization, 'w') as f: f.write(m_txt) geo_fname = processed_params.file_name_before_regularization[:-4]+'.geo' print >> log, "Outputting geo file for regularization %s" % geo_fname with open(geo_fname, 'w') as f: f.write(g_txt) #testing number of restraints assert grm.geometry.get_n_den_proxies() == 0 if reference_map is None: assert grm.geometry.get_n_reference_coordinate_proxies() == n_main_chain_atoms, "" +\ "%d %d" % (grm.geometry.get_n_reference_coordinate_proxies(), n_main_chain_atoms) refinement_log = null_out() log.write( "Refining geometry of substituted secondary structure elements...") log.flush() if verbose: refinement_log = log t10 = time() if reference_map is None: minimize_wrapper_for_ramachandran( model = model, original_pdb_h = None, excl_string_selection = "", log = refinement_log, number_of_cycles = processed_params.n_iter) else: ref_xrs = model.crystal_symmetry() minimize_wrapper_with_map( model = model, target_map=reference_map, refine_ncs_operators=False, number_of_cycles=processed_params.n_macro, log=log) model.set_sites_cart_from_hierarchy() log.write(" Done\n") log.flush() t11 = time() # print >> log, "Initial checking, init : %.4f" % (t1-t0) # print >> log, "Checking SS : %.4f" % (t2-t1) # print >> log, "Initializing selections : %.4f" % (t4-t3) # print >> log, "Looping for selections : %.4f" % (t5-t4) # print >> log, "Finalizing selections : %.4f" % (t6-t5) # print >> log, "PDB interpretation : %.4f" % (t7-t6) # print >> log, "Get GRM : %.4f" % (t8-t7) # print >> log, "Adding restraints to GRM : %.4f" % (t9-t8) # print >> log, "Running GM : %.4f" % (t11-t10) # print_hbond_proxies(grm.geometry,real_h) grm.geometry.remove_reference_coordinate_restraints_in_place() return grm.geometry.get_chi_torsion_proxies()
def substitute_ss(real_h, xray_structure, ss_annotation, params = None, grm=None, use_plane_peptide_bond_restr=True, fix_rotamer_outliers=True, cif_objects=None, log=null_out(), rotamer_manager=None, reference_map=None, verbose=False): """ Substitute secondary structure elements in real_h hierarchy with ideal ones _in_place_. Returns reference torsion proxies - the only thing that cannot be restored with little effort outside the procedure. real_h - hierarcy to substitute secondary structure elements. xray_structure - xray_structure - needed to get crystal symmetry (to construct processed_pdb_file and xray_structure is needed to call get_geometry_restraints_manager for no obvious reason). ss_annotation - iotbx.pdb.annotation object. """ t0 = time() if rotamer_manager is None: rotamer_manager = RotamerEval() for model in real_h.models(): for chain in model.chains(): if len(chain.conformers()) > 1: raise Sorry("Alternative conformations are not supported.") processed_params = process_params(params) if not processed_params.enabled: return None expected_n_hbonds = 0 ann = ss_annotation for h in ann.helices: expected_n_hbonds += h.get_n_maximum_hbonds() edited_h = real_h.deep_copy() n_atoms_in_real_h = real_h.atoms_size() selection_cache = real_h.atom_selection_cache() # check the annotation for correctness (atoms are actually in hierarchy) error_msg = "The following secondary structure annotations result in \n" error_msg +="empty atom selections. They don't match the structre: \n" t1 = time() # Checking for SS selections deleted_annotations = ann.remove_empty_annotations( hierarchy=real_h, asc=selection_cache) if not deleted_annotations.is_empty(): if processed_params.skip_empty_ss_elements: if len(deleted_annotations.helices) > 0: print >> log, "Removing the following helices because there are" print >> log, "no corresponding atoms in the model:" for h in deleted_annotations.helices: print >> log, h.as_pdb_str() error_msg += " %s\n" % h if len(deleted_annotations.sheets) > 0: print >> log, "Removing the following sheets because there are" print >> log, "no corresponding atoms in the model:" for sh in deleted_annotations.sheets: print >> log, sh.as_pdb_str() error_msg += " %s\n" % sh.as_pdb_str(strand_id=st.strand_id) else: raise Sorry(error_msg) phil_str = ann.as_restraint_groups() t2 = time() # Actually idelizing SS elements log.write("Replacing ss-elements with ideal ones:\n") log.flush() for h in ann.helices: log.write(" %s\n" % h.as_pdb_str()) log.flush() selstring = h.as_atom_selections() isel = selection_cache.iselection(selstring[0]) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = real_h.select(all_bsel, copy_atoms=True) ideal_h = get_helix(helix_class=h.helix_class, pdb_hierarchy_template=sel_h, rotamer_manager=rotamer_manager) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) set_xyz_carefully(dest_h=edited_h.select(all_bsel), source_h=ideal_h) for sh in ann.sheets: s = " %s\n" % sh.as_pdb_str() ss = s.replace("\n", "\n ") log.write(ss[:-2]) log.flush() for st in sh.strands: selstring = st.as_atom_selections() isel = selection_cache.iselection(selstring) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = real_h.select(all_bsel, copy_atoms=True) ideal_h = secondary_structure_from_sequence( pdb_str=beta_pdb_str, sequence=None, pdb_hierarchy_template=sel_h, rotamer_manager=rotamer_manager, ) set_xyz_carefully(edited_h.select(all_bsel), ideal_h) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) t3 = time() pre_result_h = edited_h pre_result_h.reset_i_seq_if_necessary() n_atoms = real_h.atoms_size() bsel = flex.bool(n_atoms, False) helix_selection = flex.bool(n_atoms, False) sheet_selection = flex.bool(n_atoms, False) other_selection = flex.bool(n_atoms, False) ss_for_tors_selection = flex.bool(n_atoms, False) nonss_for_tors_selection = flex.bool(n_atoms, False) selection_cache = real_h.atom_selection_cache() # set all CA atoms to True for other_selection #isel = selection_cache.iselection("name ca") isel = selection_cache.iselection("name ca or name n or name o or name c") other_selection.set_selected(isel, True) n_main_chain_atoms = other_selection.count(True) isel = selection_cache.iselection("name ca or name n or name o or name c") nonss_for_tors_selection.set_selected(isel, True) main_chain_selection_prefix = "(name ca or name n or name o or name c) %s" t4 = time() print >> log, "Preparing selections..." log.flush() # Here we are just preparing selections for h in ann.helices: ss_sels = h.as_atom_selections()[0] selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) helix_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) for sheet in ann.sheets: for ss_sels in sheet.as_atom_selections(): selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) sheet_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) t5 = time() isel = selection_cache.iselection( "not name ca and not name n and not name o and not name c") other_selection.set_selected(isel, False) helix_sheet_intersection = helix_selection & sheet_selection if helix_sheet_intersection.count(True) > 0: sheet_selection = sheet_selection & ~helix_sheet_intersection assert ((helix_selection | sheet_selection) & other_selection).count(True)==0 from mmtbx.monomer_library.pdb_interpretation import grand_master_phil_str params_line = grand_master_phil_str params_line += "secondary_structure {%s}" % secondary_structure.sec_str_master_phil_str # print "params_line" # print params_line params = iotbx.phil.parse(input_string=params_line, process_includes=True)#.extract() # This does not the same way for a strange reason. Need to investigate. # The number of resulting hbonds is different later. # w_params = params.extract() # w_params.pdb_interpretation.secondary_structure.protein.remove_outliers = False # w_params.pdb_interpretation.peptide_link.ramachandran_restraints = True # w_params.pdb_interpretation.c_beta_restraints = True # w_params.pdb_interpretation.secondary_structure.enabled = True # params.format(python_object=w_params) # params.show() # print "="*80 # print "="*80 # print "="*80 if grm is None: custom_par_text = "\n".join([ "pdb_interpretation.secondary_structure {protein.remove_outliers = False\n%s}" \ % phil_str, "pdb_interpretation.peptide_link.ramachandran_restraints = True", "c_beta_restraints = True", "pdb_interpretation.secondary_structure.enabled=True", "pdb_interpretation.clash_guard.nonbonded_distance_threshold=None", "pdb_interpretation.max_reasonable_bond_distance=None", # "pdb_interpretation.nonbonded_weight=500", "pdb_interpretation.peptide_link.oldfield.weight_scale=3", "pdb_interpretation.peptide_link.oldfield.plot_cutoff=0.03", "pdb_interpretation.peptide_link.omega_esd_override_value=3", "pdb_interpretation.peptide_link.apply_all_trans=True", ]) if use_plane_peptide_bond_restr: custom_par_text += "\npdb_interpretation.peptide_link.apply_peptide_plane=True" custom_pars = params.fetch( source=iotbx.phil.parse(custom_par_text)).extract() # params.format(python_object=custom_pars) # params.show() # STOP() params = custom_pars # params = w_params t6 = time() import mmtbx.utils processed_pdb_files_srv = mmtbx.utils.\ process_pdb_file_srv( crystal_symmetry= xray_structure.crystal_symmetry(), pdb_interpretation_params = params.pdb_interpretation, log=null_out(), cif_objects=cif_objects) if verbose: print >> log, "Processing file..." log.flush() processed_pdb_file, junk = processed_pdb_files_srv.\ process_pdb_files(raw_records=flex.split_lines(real_h.as_pdb_string())) t7 = time() grm = get_geometry_restraints_manager( processed_pdb_file, xray_structure) t8 = time() else: ss_manager = secondary_structure.manager( pdb_hierarchy=real_h, geometry_restraints_manager=grm.geometry, sec_str_from_pdb_file=ss_annotation, params=None, mon_lib_srv=None, verbose=-1, log=log) grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=real_h, log=log) real_h.reset_i_seq_if_necessary() from mmtbx.geometry_restraints import reference if reference_map is None: if verbose: print >> log, "Adding reference coordinate restraints..." grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(helix_selection), selection = helix_selection, sigma = processed_params.sigma_on_reference_helix)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(sheet_selection), selection = sheet_selection, sigma = processed_params.sigma_on_reference_sheet)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(other_selection), selection = other_selection, sigma = processed_params.sigma_on_reference_non_ss)) if verbose: print >> log, "Adding chi torsion restraints..." grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = pre_result_h, sites_cart = pre_result_h.atoms().extract_xyz().\ select(ss_for_tors_selection), selection = ss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_ss) grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = pre_result_h, sites_cart = real_h.atoms().extract_xyz().\ select(nonss_for_tors_selection), selection = nonss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_nonss) real_h.atoms().set_xyz(pre_result_h.atoms().extract_xyz()) t9 = time() if processed_params.file_name_before_regularization is not None: grm.geometry.pair_proxies(sites_cart=real_h.atoms().extract_xyz()) if grm.geometry.ramachandran_manager is not None: grm.geometry.ramachandran_manager.update_phi_psi_targets( sites_cart=real_h.atoms().extract_xyz()) print >> log, "Outputting model before regularization %s" % processed_params.file_name_before_regularization real_h.write_pdb_file( file_name=processed_params.file_name_before_regularization) geo_fname = processed_params.file_name_before_regularization[:-4]+'.geo' print >> log, "Outputting geo file for regularization %s" % geo_fname grm.write_geo_file( site_labels=[atom.id_str() for atom in real_h.atoms()], file_name=geo_fname) #testing number of restraints assert grm.geometry.get_n_den_proxies() == 0 if reference_map is None: assert grm.geometry.get_n_reference_coordinate_proxies() == n_main_chain_atoms refinement_log = null_out() log.write( "Refining geometry of substituted secondary structure elements...") log.flush() if verbose: refinement_log = log from mmtbx.refinement.geometry_minimization import run2 t10 = time() if reference_map is None: obj = run2( restraints_manager = grm, pdb_hierarchy = real_h, correct_special_position_tolerance = 1.0, max_number_of_iterations = processed_params.n_iter, number_of_macro_cycles = processed_params.n_macro, bond = True, nonbonded = True, angle = True, dihedral = True, chirality = True, planarity = True, fix_rotamer_outliers = fix_rotamer_outliers, log = refinement_log) else: ref_xrs = real_h.extract_xray_structure( crystal_symmetry=xray_structure.crystal_symmetry()) minimize_wrapper_with_map( pdb_h=real_h, xrs=ref_xrs, target_map=reference_map, grm=grm, ncs_restraints_group_list=[], mon_lib_srv=None, ss_annotation=ss_annotation, refine_ncs_operators=False, number_of_cycles=processed_params.n_macro, log=log) real_h.write_pdb_file("after_ss_map_min.pdb") log.write(" Done\n") log.flush() t11 = time() # print >> log, "Initial checking, init : %.4f" % (t1-t0) # print >> log, "Checking SS : %.4f" % (t2-t1) # print >> log, "Initializing selections : %.4f" % (t4-t3) # print >> log, "Looping for selections : %.4f" % (t5-t4) # print >> log, "Finalizing selections : %.4f" % (t6-t5) # print >> log, "PDB interpretation : %.4f" % (t7-t6) # print >> log, "Get GRM : %.4f" % (t8-t7) # print >> log, "Adding restraints to GRM : %.4f" % (t9-t8) # print >> log, "Running GM : %.4f" % (t11-t10) # print_hbond_proxies(grm.geometry,real_h) return grm.geometry.get_chi_torsion_proxies()
def whole_minimization(self): t3 = time() # pre_result_h = edited_h # pre_result_h.reset_i_seq_if_necessary() bsel = flex.bool(self.model.get_number_of_atoms(), False) helix_selection = flex.bool(self.model.get_number_of_atoms(), False) sheet_selection = flex.bool(self.model.get_number_of_atoms(), False) other_selection = flex.bool(self.model.get_number_of_atoms(), False) ss_for_tors_selection = flex.bool(self.model.get_number_of_atoms(), False) nonss_for_tors_selection = flex.bool(self.model.get_number_of_atoms(), False) # set all CA atoms to True for other_selection #isel = self.model.get_atom_selection_cache().iselection("name ca") isel = self.model.get_atom_selection_cache().iselection("name ca or name n or name o or name c") other_selection.set_selected(isel, True) n_main_chain_atoms = other_selection.count(True) isel = self.model.get_atom_selection_cache().iselection("name ca or name n or name o or name c") nonss_for_tors_selection.set_selected(isel, True) main_chain_selection_prefix = "(name ca or name n or name o or name c) %s" t4 = time() print("Preparing selections...", file=self.log) self.log.flush() # Here we are just preparing selections for h in self.ss_annotation.helices: ss_sels = h.as_atom_selections()[0] selstring = main_chain_selection_prefix % ss_sels isel = self.model.get_atom_selection_cache().iselection(selstring) helix_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = self.model.get_atom_selection_cache().iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) for sheet in self.ss_annotation.sheets: for ss_sels in sheet.as_atom_selections(): selstring = main_chain_selection_prefix % ss_sels isel = self.model.get_atom_selection_cache().iselection(selstring) sheet_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = self.model.get_atom_selection_cache().iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) t5 = time() # print("N idealized elements: %d" % n_idealized_elements, file=self.log) # print("Initial checking, init : %.4f" % (t1-t0), file=self.log) # print("Checking SS : %.4f" % (t2-t1), file=self.log) # print("Changing SS : %.4f" % (t3-t2), file=self.log) # print("Initializing selections : %.4f" % (t4-t3), file=self.log) # print("Looping for selections : %.4f" % (t5-t4), file=self.log) # with open('idealized.pdb', 'w') as f: # f.write(self.model.model_as_pdb()) # return isel = self.model.get_atom_selection_cache().iselection( "not name ca and not name n and not name o and not name c") other_selection.set_selected(isel, False) helix_sheet_intersection = helix_selection & sheet_selection if helix_sheet_intersection.count(True) > 0: sheet_selection = sheet_selection & ~helix_sheet_intersection assert ((helix_selection | sheet_selection) & other_selection).count(True)==0 from mmtbx.monomer_library.pdb_interpretation import grand_master_phil_str params_line = grand_master_phil_str params_line += "secondary_structure {%s}" % secondary_structure.sec_str_master_phil_str # print "params_line" # print params_line params = iotbx.phil.parse(input_string=params_line, process_includes=True)#.extract() # This does not work the same way for a strange reason. Need to investigate. # The number of resulting hbonds is different later. # w_params = params.extract() # w_params.pdb_interpretation.secondary_structure.protein.remove_outliers = False # w_params.pdb_interpretation.peptide_link.ramachandran_restraints = True # w_params.pdb_interpretation.c_beta_restraints = True # w_params.pdb_interpretation.secondary_structure.enabled = True # params.format(python_object=w_params) # params.show() # print "="*80 # print "="*80 # print "="*80 grm = self.model.get_restraints_manager() ssm_log = null_out() if self.processed_params.verbose: ssm_log = self.log ss_params = secondary_structure.sec_str_master_phil.fetch().extract() ss_params.secondary_structure.protein.remove_outliers=False ss_manager = secondary_structure.manager( pdb_hierarchy=self.model.get_hierarchy(), geometry_restraints_manager=grm.geometry, sec_str_from_pdb_file=self.ss_annotation, params=ss_params.secondary_structure, mon_lib_srv=None, verbose=-1, log=ssm_log) grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=self.model.get_hierarchy(), log=ssm_log) self.model.get_hierarchy().reset_i_seq_if_necessary() from mmtbx.geometry_restraints import reference if self.reference_map is None: if self.processed_params.verbose: print("Adding reference coordinate restraints...", file=self.log) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = self.model.get_sites_cart().select(helix_selection), selection = helix_selection, sigma = self.processed_params.sigma_on_reference_helix)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = self.model.get_sites_cart().select(sheet_selection), selection = sheet_selection, sigma = self.processed_params.sigma_on_reference_sheet)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = self.model.get_sites_cart().select(other_selection), selection = other_selection, sigma = self.processed_params.sigma_on_reference_non_ss)) # XXX Somewhere here we actually should check placed side-chains for # clashes because we used ones that were in original model and just moved # them to nearest allowed rotamer. The idealization may affect a lot # the orientation of side chain thus justifying changing rotamer on it # to avoid clashes. if self.processed_params.fix_rotamer_outliers: print("Fixing/checking rotamers...", file=self.log) # pre_result_h.write_pdb_file(file_name="before_rotamers.pdb") br_txt = self.model.model_as_pdb() with open("before_rotamers.pdb", 'w') as f: f.write(br_txt) if(self.reference_map is None): backbone_sample=False else: backbone_sample=True result = mmtbx.refinement.real_space.fit_residues.run( pdb_hierarchy = self.model.get_hierarchy(), crystal_symmetry = self.model.crystal_symmetry(), map_data = self.reference_map, rotamer_manager = mmtbx.idealized_aa_residues.rotamer_manager.load( rotamers="favored"), sin_cos_table = scitbx.math.sin_cos_table(n=10000), backbone_sample = backbone_sample, mon_lib_srv = self.model.get_mon_lib_srv(), log = self.log) self.model.set_sites_cart( sites_cart = result.pdb_hierarchy.atoms().extract_xyz()) if self.processed_params.verbose: print("Adding chi torsion restraints...", file=self.log) # only backbone grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = self.model.get_hierarchy(), sites_cart = self.model.get_sites_cart().\ select(ss_for_tors_selection), selection = ss_for_tors_selection, chi_angles_only = False, sigma = self.processed_params.sigma_on_torsion_ss) grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = self.model.get_hierarchy(), sites_cart = self.model.get_sites_cart().\ select(nonss_for_tors_selection), selection = nonss_for_tors_selection, chi_angles_only = False, sigma = self.processed_params.sigma_on_torsion_nonss) # real_h.atoms().set_xyz(pre_result_h.atoms().extract_xyz()) # # Check and correct for special positions # real_h = self.model.get_hierarchy() # just a shortcut here... special_position_settings = crystal.special_position_settings( crystal_symmetry = self.model.crystal_symmetry()) site_symmetry_table = \ special_position_settings.site_symmetry_table( sites_cart = self.model.get_sites_cart(), unconditional_general_position_flags=( self.model.get_atoms().extract_occ() != 1)) spi = site_symmetry_table.special_position_indices() if spi.size() > 0: print("Moving atoms from special positions:", file=self.log) for spi_i in spi: if spi_i not in self.original_spi: new_coords = ( real_h.atoms()[spi_i].xyz[0]+0.2, real_h.atoms()[spi_i].xyz[1]+0.2, real_h.atoms()[spi_i].xyz[2]+0.2) print(" ", real_h.atoms()[spi_i].id_str(), end=' ', file=self.log) print(tuple(real_h.atoms()[spi_i].xyz), "-->", new_coords, file=self.log) real_h.atoms()[spi_i].set_xyz(new_coords) self.model.set_sites_cart_from_hierarchy() self.model_before_regularization = self.model.deep_copy() t9 = time() if self.processed_params.file_name_before_regularization is not None: grm.geometry.pair_proxies(sites_cart=self.model.get_sites_cart()) grm.geometry.update_ramachandran_restraints_phi_psi_targets( hierarchy=self.model.get_hierarchy()) print("Outputting model before regularization %s" % self.processed_params.file_name_before_regularization, file=self.log) m_txt = self.model.model_as_pdb() g_txt = self.model.restraints_as_geo() with open(self.processed_params.file_name_before_regularization, 'w') as f: f.write(m_txt) geo_fname = self.processed_params.file_name_before_regularization[:-4]+'.geo' print("Outputting geo file for regularization %s" % geo_fname, file=self.log) with open(geo_fname, 'w') as f: f.write(g_txt) #testing number of restraints assert grm.geometry.get_n_den_proxies() == 0 if self.reference_map is None: assert grm.geometry.get_n_reference_coordinate_proxies() == n_main_chain_atoms, "" +\ "%d %d" % (grm.geometry.get_n_reference_coordinate_proxies(), n_main_chain_atoms) refinement_log = null_out() self.log.write( "Refining geometry of substituted secondary structure elements\n") self.log.write( " for %s macro_cycle(s).\n" % self.processed_params.n_macro) self.log.flush() if self.processed_params.verbose: refinement_log = self.log t10 = time() if self.reference_map is None: n_cycles = self.processed_params.n_macro if self.processed_params.n_macro == Auto: n_cycles=5 minimize_wrapper_for_ramachandran( model = self.model, original_pdb_h = None, excl_string_selection = "", log = refinement_log, number_of_cycles = n_cycles) else: ref_xrs = self.model.crystal_symmetry() minimize_wrapper_with_map( model = self.model, target_map=self.reference_map, refine_ncs_operators=False, number_of_cycles=self.processed_params.n_macro, min_mode='simple_cycles', log=self.log) self.model.set_sites_cart_from_hierarchy() self.log.write(" Done\n") self.log.flush() t11 = time() # print("Initial checking, init : %.4f" % (t1-t0), file=self.log) # print("Checking SS : %.4f" % (t2-t1), file=self.log) # print("Initializing selections : %.4f" % (t4-t3), file=self.log) # print("Looping for selections : %.4f" % (t5-t4), file=self.log) # print("Finalizing selections : %.4f" % (t6-t5), file=self.log) # print("PDB interpretation : %.4f" % (t7-t6), file=self.log) # print("Get GRM : %.4f" % (t8-t7), file=self.log) # print("Adding restraints to GRM : %.4f" % (t9-t8), file=self.log) # print("Running GM : %.4f" % (t11-t10), file=self.log) # print_hbond_proxies(grm.geometry,real_h) grm.geometry.remove_reference_coordinate_restraints_in_place() grm.geometry.remove_chi_torsion_restraints_in_place(nonss_for_tors_selection) return grm.geometry.get_chi_torsion_proxies()
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation = secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 # self.berkeley_p_before_minimization_rama_outliers = None self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" number_of_ccd_trials = 0 # print "logic expr outcome:", (number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if self.berkeley_p_before_minimization_rama_outliers <= 0.001: print >> self.log, "No ramachandran outliers, skipping CCD step." if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (number_of_ccd_trials < self.params.number_of_ccd_trials and self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 and self.params.enabled): print "CCD try number, outliers:", number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers number_of_ccd_trials += 1 processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain( c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % ( c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string( )).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[: -3] # self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minimization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count if self.params.minimize_whole: print >> self.log, "minimizing whole thing..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, log=self.log) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count
def fix_rama_outlier(self, pdb_hierarchy, out_res_num, prefix="", minimize=True, ss_annotation=None): original_pdb_h = pdb_hierarchy.deep_copy() original_pdb_h.reset_atom_i_seqs() chain_id = original_pdb_h.only_model().only_chain().id all_results = [] variants_searches = [ ((1, False, 0), 1), ((2, False, 0), 1), ((3, False, 0), 2), ((2, True, 1), 1), ((3, True, 1), 2), ((3, True, 2), 3), ] decided_variants = [] for variant, level in variants_searches: if level <= self.params.variant_search_level: decided_variants.append(variant) for ccd_radius, change_all, change_radius in decided_variants: # while ccd_radius <= 3: print >> self.log, " Starting optimization with radius, change_all, change_radius:", ccd_radius, change_all, change_radius self.log.flush() # moving_h, moving_ref_atoms_iseqs, fixed_ref_atoms, m_selection, contains_ss_element = get_fixed_moving_parts( pdb_hierarchy=pdb_hierarchy, out_res_num=out_res_num, n_following=ccd_radius, n_previous=ccd_radius, ss_annotation=ss_annotation) moving_h_set = None if change_all: moving_h_set = starting_conformations.get_all_starting_conformations( moving_h, change_radius, cutoff=self.params.variant_number_cutoff, # log=self.log, ) else: moving_h_set = starting_conformations.get_starting_conformations( moving_h, cutoff=self.params.variant_number_cutoff, # log=self.log, ) if len(moving_h_set) == 0: # outlier was fixed before somehow... # or there's a bug in get_starting_conformations return original_pdb_h for i, h in enumerate(moving_h_set): fixed_ref_atoms_coors = [x.xyz for x in fixed_ref_atoms] # print "params to constructor", fixed_ref_atoms, h, moving_ref_atoms_iseqs ccd_obj = ccd_cpp(fixed_ref_atoms_coors, h, moving_ref_atoms_iseqs) ccd_obj.run() resulting_rmsd = ccd_obj.resulting_rmsd n_iter = ccd_obj.n_iter # states = ccd_obj.states # if self.params.save_states: # states.write(file_name="%s%s_%d_%s_%d_%i_states.pdb" % (chain_id, out_res_num, ccd_radius, change_all, change_radius, i)) map_target = 0 if self.reference_map is not None: map_target = maptbx.real_space_target_simple( unit_cell=self.xrs.crystal_symmetry().unit_cell(), density_map=self.reference_map, sites_cart=h.atoms().extract_xyz()) mc_rmsd = get_main_chain_rmsd_range(moving_h, h, all_atoms=True) if self.verbose: print >> self.log, "Resulting anchor and backbone RMSDs, mapcc, n_iter for model %d:" % i, print >> self.log, resulting_rmsd, ",", mc_rmsd, ",", map_target, ",", n_iter self.log.flush() # # setting new coordinates # moved_with_side_chains_h = pdb_hierarchy.deep_copy() # setting xyz # for i_source, i_dest in enumerate(m_selection): moved_with_side_chains_h.atoms()[i_dest].set_xyz( h.atoms()[i_source].xyz) # set_xyz_smart( # dest_h=moved_with_side_chains_h, # source_h=h) # # placing side-chains # # moved_with_side_chains_h.write_pdb_file( # file_name="%s_before_sc_placement_%d.pdb" % (prefix, i)) placing_range = get_res_nums_around(moved_with_side_chains_h, center_resnum=out_res_num, n_following=ccd_radius, n_previous=ccd_radius, include_intermediate=True) place_side_chains(moved_with_side_chains_h, original_pdb_h, self.rotamer_manager, placing_range) # moved_with_side_chains_h.write_pdb_file( # file_name="%s_after_sc_placement_%d.pdb" % (prefix, i)) # # finalizing with geometry_minimization # all_results.append( (moved_with_side_chains_h.deep_copy(), mc_rmsd, resulting_rmsd, map_target, n_iter)) if self.ccd_solution_is_ok( anchor_rmsd=resulting_rmsd, mc_rmsd=mc_rmsd, ccd_radius=ccd_radius, change_all_angles=change_all, change_radius=change_radius, contains_ss_element=contains_ss_element): print "Choosen result (mc_rmsd, anchor_rmsd, map_target, n_iter):", mc_rmsd, resulting_rmsd, map_target, n_iter self.log.flush() if minimize: print >> self.log, "minimizing..." # moved_with_side_chains_h.write_pdb_file( # file_name="%s_result_before_min_%d.pdb" % (prefix, i)) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=moved_with_side_chains_h, xrs=xrs, original_pdb_h=original_pdb_h, log=self.log, grm=self.grm, ss_annotation=self. secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=moved_with_side_chains_h, xrs=xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self. secondary_structure_annotation, log=self.log) # moved_with_side_chains_h.write_pdb_file( # file_name="%s_result_minimized_%d.pdb" % (prefix, i)) final_rmsd = get_main_chain_rmsd_range( moved_with_side_chains_h, original_pdb_h, placing_range) print >> self.log, "FINAL RMSD after minimization:", final_rmsd return moved_with_side_chains_h all_results.sort(key=lambda tup: tup[1]) if self.verbose: print >> self.log, "ALL RESULTS:" i = 0 for ar in all_results: print >> self.log, ar[1:], if ar[2] < 0.4: # fn = "variant_%d.pdb" % i # ar[0].write_pdb_file(file_name=fn) # print fn i += 1 else: print >> self.log, " no output" if self.params.force_rama_fixes: # find and apply the best varian from all_results. This would be the one # with the smallest rmsd given satisfactory closure print >> self.log, "Applying the best found variant:", i = 0 while i < len(all_results) and all_results[i][2] > 0.1: i += 1 # apply # === duplication!!!! if i < len(all_results): print >> self.log, all_results[i][1:] if minimize: print >> self.log, "minimizing..." # all_results[i][0].write_pdb_file( # file_name="%s_result_before_min_%d.pdb" % (prefix, i)) minimize_wrapper_for_ramachandran( hierarchy=all_results[i][0], xrs=xrs, original_pdb_h=original_pdb_h, log=self.log, ss_annotation=self.secondary_structure_annotation) # all_results[i][0].write_pdb_file( # file_name="%s_result_minimized_%d.pdb" % (prefix, i)) final_rmsd = get_main_chain_rmsd_range(all_results[i][0], original_pdb_h, placing_range) print >> self.log, "FINAL RMSD after minimization:", final_rmsd return moved_with_side_chains_h else: print >> self.log, " NOT FOUND!" for i in all_results: print >> self.log, i[1:] # === end of duplication!!!! else: print >> self.log, "Epic FAIL: failed to fix rama outlier" print >> self.log, " Options were: (mc_rmsd, resultign_rmsd, n_iter)" for i in all_results: print >> self.log, i[1:] # STOP() return original_pdb_h