def __init__(self, pdb_hierarchy, params=None, log=null_out(), verbose=True): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy xrs = pdb_hierarchy.extract_xray_structure() asc = pdb_hierarchy.atom_selection_cache() self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.params = self.process_params(params) self.log = log self.verbose = verbose self.r = ramachandran_eval.RamachandranEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None self.ref_exclusion_selection = "" for chain in pdb_hierarchy.only_model().chains(): print >>self.log, "Idealizing chain %s" % chain.id selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.original_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >>self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >>self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain(hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart(self.resulting_pdb_h, ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent if self.params.minimize_whole: print >>self.log, "minimizing whole thing..." print >>self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection minimize_hierarchy(self.resulting_pdb_h, xrs, self.original_pdb_h, self.ref_exclusion_selection, log=None) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False, tried_rama_angles={}, tried_final_rama_angles={}, n_run=0): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation=secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager self.ideal_res_dict = idealized_aa.residue_dict() self.n_run = n_run if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None # here we are recording what CCD solutions were used to fix particular # outliers to not use the same in the next CCD try. # Nested dict. First level: # key: chain id, value: dict # key: resid (string), value: list of tried variants. self.tried_rama_angles = tried_rama_angles self.tried_final_rama_angles = tried_final_rama_angles berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" self.number_of_ccd_trials = 0 # print "logic expr outcome:", (self.number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print self.number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if (self.berkeley_p_before_minimization_rama_outliers <= 0.001 and (n_bad_omegas<1 and self.params.make_all_trans)): print >> self.log, "No ramachandran outliers, skipping CCD step." print "n_bad_omegas", n_bad_omegas print "self.params.make_all_trans",self.params.make_all_trans if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (self.number_of_ccd_trials < self.params.number_of_ccd_trials and (self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 or (n_bad_omegas>=1 and self.params.make_all_trans)) and self.params.enabled): print >> self.log, "CCD try number, outliers:", self.number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): if chain.id not in self.tried_rama_angles.keys(): self.tried_rama_angles[chain.id] = {} if chain.id not in self.tried_final_rama_angles.keys(): self.tried_final_rama_angles[chain.id] = {} print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h), tried_rama_angles_for_chain=self.tried_rama_angles[chain.id], tried_final_rama_angles_for_chain=self.tried_final_rama_angles[chain.id]) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection print "self.tried_rama_angles", self.tried_rama_angles print "self.tried_final_rama_angles", self.tried_final_rama_angles # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count self.resulting_pdb_h.write_pdb_file(file_name="%d%s_discrepancy.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_not_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.minimize_whole: print >> self.log, "minimizing whole chain..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel # XXX but first let's check and fix rotamers... print >> self.log, "Fixing/checking rotamers in loop idealization..." excl_sel = self.ref_exclusion_selection if len(excl_sel) == 0: excl_sel = None non_outliers_for_check = asc.selection("(%s)" % self.ref_exclusion_selection) pre_result_h = mmtbx.utils.fix_rotamer_outliers( pdb_hierarchy=self.resulting_pdb_h, grm=self.grm.geometry, xrs=self.xrs, map_data=self.reference_map, radius=5, mon_lib_srv=None, rotamer_manager=self.rotamer_manager, backrub_range=None, # don't sample backrub at this point non_outliers_to_check=non_outliers_for_check, # bool selection asc=asc, verbose=True, log=self.log) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, number_of_cycles=Auto, log=self.log) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count print >> self.log, "Number of bad omegas:", n_bad_omegas self.number_of_ccd_trials += 1
def fix_rama_outlier(self, pdb_hierarchy, out_res_num, prefix="", minimize=True): original_pdb_h = pdb_hierarchy.deep_copy() rotamer_manager = RotamerEval() all_results = [] for ccd_radius, change_all, change_radius in [ (1, False, 0), (2, False, 0), # (3, False, 0), (2, True, 1), # (3, True, 1), ]: # while ccd_radius <= 3: print >>self.log, " Starting optimization with radius, change_all:", ccd_radius, change_all moving_h, moving_ref_atoms_iseqs, fixed_ref_atoms = get_fixed_moving_parts( pdb_hierarchy=pdb_hierarchy, out_res_num=out_res_num, n_following=ccd_radius, n_previous=ccd_radius ) moving_h_set = None if change_all: moving_h_set = starting_conformations.get_all_starting_conformations( moving_h, change_radius, log=self.log ) else: moving_h_set = starting_conformations.get_starting_conformations(moving_h, log=self.log) if len(moving_h_set) == 0: # outlier was fixed before somehow... return original_pdb_h rotamer_manager = RotamerEval() for i, h in enumerate(moving_h_set): ccd_obj = ccd_python(fixed_ref_atoms, h, moving_ref_atoms_iseqs) ccd_obj.run() resulting_rmsd = ccd_obj.resulting_rmsd states = ccd_obj.states n_iter = ccd_obj.n_iter # resulting_rmsd, states, n_iter = ccd( # fixed_ref_atoms, h, moving_ref_atoms_iseqs, moving_h) mc_rmsd = get_main_chain_rmsd_range(moving_h, h, all_atoms=True) print >>self.log, "Resulting anchor and backbone RMSDs, n_iter for model %d:" % i, print >>self.log, resulting_rmsd, ",", mc_rmsd, ",", n_iter all_results.append((h.deep_copy(), mc_rmsd, resulting_rmsd, n_iter)) # # setting new coordinates # moved_with_side_chains_h = pdb_hierarchy.deep_copy() set_xyz_smart(moved_with_side_chains_h, h) # # placing side-chains # # moved_with_side_chains_h.write_pdb_file( # file_name="%s_before_sc_placement_%d.pdb" % (prefix, i)) placing_range = get_res_nums_around( moved_with_side_chains_h, center_resnum=out_res_num, n_following=ccd_radius, n_previous=ccd_radius, include_intermediate=True, ) place_side_chains(moved_with_side_chains_h, original_pdb_h, rotamer_manager, placing_range) # moved_with_side_chains_h.write_pdb_file( # file_name="%s_after_sc_placement_%d.pdb" % (prefix, i)) # # finalizing with geometry_minimization # # !!! This is the condition of acceptance of transformation! # if mc_rmsd < adaptive_mc_rmsd[ccd_radius]: if self.ccd_solution_is_ok( anchor_rmsd=resulting_rmsd, mc_rmsd=mc_rmsd, ccd_radius=ccd_radius, change_all_angles=change_all, change_radius=change_radius, ): if minimize: print >>self.log, "minimizing..." moved_with_side_chains_h.write_pdb_file(file_name="%s_result_before_min_%d.pdb" % (prefix, i)) minimize_hierarchy(moved_with_side_chains_h, xrs, original_pdb_h, self.log) moved_with_side_chains_h.write_pdb_file(file_name="%s_result_minimized_%d.pdb" % (prefix, i)) final_rmsd = get_main_chain_rmsd_range(moved_with_side_chains_h, original_pdb_h, placing_range) print >>self.log, "FINAL RMSD after minimization:", final_rmsd return moved_with_side_chains_h ccd_radius += 1 print >>self.log, "Epic FAIL: failed to fix rama outlier" all_results.sort(key=lambda tup: tup[1]) print >>self.log, " Options were: (mc_rmsd, resultign_rmsd, n_iter)" for i in all_results: print >>self.log, i[1:] # STOP() return original_pdb_h
def run(self): t_0 = time() ncs_obj = iotbx.ncs.input( hierarchy=self.whole_pdb_h, chain_max_rmsd=4.0, chain_similarity_threshold=0.99, residue_match_radius=999.0) print >> self.log, "Found NCS groups:" ncs_obj.show(format='phil', log=self.log) ncs_restr_group_list = ncs_obj.get_ncs_restraints_group_list( raise_sorry=False) self.using_ncs = False total_ncs_selected_atoms = 0 master_sel = flex.size_t([]) filtered_ncs_restr_group_list = self.filter_ncs_restraints_group_list( self.whole_pdb_h, ncs_restr_group_list) if len(filtered_ncs_restr_group_list) > 0: self.using_ncs = True master_sel = flex.bool(self.whole_pdb_h.atoms_size(), True) for ncs_gr in filtered_ncs_restr_group_list: for copy in ncs_gr.copies: master_sel.set_selected(copy.iselection, False) self.master_pdb_h = self.whole_pdb_h.select(master_sel) self.master_sel=master_sel self.master_pdb_h.reset_atom_i_seqs() if self.using_ncs: self.master_pdb_h.write_pdb_file("%s_master_h.pdb" % self.params.output_prefix) self.working_pdb_h = self.master_pdb_h else: self.working_pdb_h = self.whole_pdb_h self.working_pdb_h.reset_atom_i_seqs() self.ann = ioss.annotation.from_phil( phil_helices=self.params.secondary_structure.protein.helix, phil_sheets=self.params.secondary_structure.protein.sheet, pdb_hierarchy=self.whole_pdb_h) self.working_xrs = self.working_pdb_h.extract_xray_structure(crystal_symmetry=self.cs) if self.using_ncs: self.whole_xrs = self.whole_pdb_h.extract_xray_structure(crystal_symmetry=self.cs) else: self.whole_xrs = self.working_xrs if self.params.use_map_for_reference: # self.prepare_reference_map(xrs=self.whole_xrs, pdb_h=self.whole_pdb_h) # self.prepare_reference_map_2(xrs=self.whole_xrs, pdb_h=self.whole_pdb_h) self.prepare_reference_map_3(xrs=self.whole_xrs, pdb_h=self.whole_pdb_h) # STOP() if self.ann.get_n_helices() + self.ann.get_n_sheets() == 0: self.ann = self.pdb_input.extract_secondary_structure() self.original_ann = None self.filtered_whole_ann = None if self.ann is not None: self.original_ann = self.ann.deep_copy() print >> self.log, "Original SS annotation" print >> self.log, self.original_ann.as_pdb_str() self.ann.remove_short_annotations() self.filtered_whole_ann = self.ann.deep_copy() self.ann.remove_empty_annotations( hierarchy=self.working_pdb_h) self.filtered_whole_ann.remove_empty_annotations( hierarchy=self.whole_pdb_h) # self.ann.concatenate_consecutive_helices() self.ann.split_helices_with_prolines( hierarchy=self.working_pdb_h, asc=None) self.filtered_whole_ann.split_helices_with_prolines( hierarchy=self.whole_pdb_h, asc=None) # print >> self.log, "Splitted SS annotation" # print >> self.log, ann.as_pdb_str() print >> self.log, "Filtered SS annotation" print >> self.log, self.ann.as_pdb_str() # getting grm with SS restraints self.get_grm() if (self.ann is None or self.ann.get_n_helices() + self.ann.get_n_sheets() == 0 or not self.params.ss_idealization.enabled): print >> self.log, "No secondary structure annotations found or SS idealization is disabled." print >> self.log, "Secondary structure substitution step will be skipped" self.log.flush() # here we want to do geometry minimization anyway! negate_selection = None if self.reference_map is None: outlier_selection_txt = mmtbx.building.loop_closure.utils. \ rama_outliers_selection(self.working_pdb_h, self.rama_manager, 1) print >> self.log, "outlier_selection_txt", outlier_selection_txt negate_selection = "all" if outlier_selection_txt != "" and outlier_selection_txt is not None: negate_selection = "not (%s)" % outlier_selection_txt self.minimize( hierarchy=self.whole_pdb_h, xrs=self.whole_xrs, original_pdb_h=self.whole_pdb_h, grm=self.whole_grm, ncs_restraints_group_list=filtered_ncs_restr_group_list, excl_string_selection=negate_selection, ss_annotation=self.ann, reference_map=self.reference_map) # self.original_boxed_hierarchy.write_pdb_file(file_name="original_boxed_h_1.pdb") else: self.params.ss_idealization.file_name_before_regularization = \ "%s_ss_before_reg.pdb" % self.params.output_prefix ssb.substitute_ss( real_h=self.working_pdb_h, xray_structure=self.working_xrs, ss_annotation=self.ann, params=self.params.ss_idealization, grm=self.working_grm, fix_rotamer_outliers=True, cif_objects=self.cif_objects, verbose=True, reference_map=self.reference_map, rotamer_manager=self.rotamer_manager, log=self.log) self.log.flush() self.after_ss_idealization = geometry_no_grm( pdb_hierarchy=iotbx.pdb.input( source_info=None, lines=self.working_pdb_h.as_pdb_string()).construct_hierarchy(), molprobity_scores=True) # Write resulting pdb file. self.shift_and_write_result( hierarchy=self.working_pdb_h, fname_suffix="ss_ideal", grm=self.working_grm) # STOP() self.params.loop_idealization.minimize_whole = not self.using_ncs # self.params.loop_idealization.enabled = False # self.params.loop_idealization.variant_search_level = 0 loop_ideal = loop_idealization( pdb_hierarchy=self.working_pdb_h, params=self.params.loop_idealization, secondary_structure_annotation=self.ann, reference_map=self.reference_map, crystal_symmetry=self.working_xrs.crystal_symmetry(), grm=self.working_grm, rama_manager=self.rama_manager, rotamer_manager=self.rotamer_manager, log=self.log, verbose=True) self.log.flush() # STOP() self.shift_and_write_result( hierarchy=loop_ideal.resulting_pdb_h, fname_suffix="rama_ideal", grm=self.working_grm) self.after_loop_idealization = geometry_no_grm( pdb_hierarchy=iotbx.pdb.input( source_info=None, lines=loop_ideal.resulting_pdb_h.as_pdb_string()).construct_hierarchy(), molprobity_scores=True) # fixing remaining rotamer outliers fixed_rot_pdb_h = loop_ideal.resulting_pdb_h.deep_copy() fixed_rot_pdb_h.reset_atom_i_seqs() if (self.params.additionally_fix_rotamer_outliers and self.after_loop_idealization.rotamer_outliers > 0.004): print >> self.log, "Processing pdb file again for fixing rotamers..." self.log.flush() print >> self.log, "Fixing rotamers..." self.log.flush() self.shift_and_write_result( hierarchy=fixed_rot_pdb_h, fname_suffix="just_before_rota") fixed_rot_pdb_h = fix_rotamer_outliers( pdb_hierarchy=fixed_rot_pdb_h, grm=self.working_grm.geometry, xrs=self.working_xrs, map_data=self.reference_map, mon_lib_srv=self.mon_lib_srv, rotamer_manager=self.rotamer_manager, verbose=True) self.shift_and_write_result( hierarchy=fixed_rot_pdb_h, fname_suffix="rota_ideal", grm=self.working_grm) cs_to_write = self.cs if self.shift_vector is None else None self.after_rotamer_fixing = geometry_no_grm( pdb_hierarchy=iotbx.pdb.input( source_info=None, lines=fixed_rot_pdb_h.as_pdb_string()).construct_hierarchy(), molprobity_scores=True) ref_hierarchy_for_final_gm = self.original_boxed_hierarchy if not self.params.use_starting_model_for_final_gm: ref_hierarchy_for_final_gm = self.whole_pdb_h ref_hierarchy_for_final_gm.reset_atom_i_seqs() if self.params.additionally_fix_rotamer_outliers: ssb.set_xyz_smart(self.working_pdb_h, fixed_rot_pdb_h) if self.using_ncs: print >> self.log, "Using ncs" # multiply back and do geometry_minimization for the whole molecule for ncs_gr in ncs_restr_group_list: master_h = self.whole_pdb_h.select(ncs_gr.master_iselection) for c in ncs_gr.copies: new_sites = master_h.atoms().extract_xyz() new_c_sites = c.r.elems * new_sites + c.t self.whole_pdb_h.select(c.iselection).atoms().set_xyz(new_c_sites) self.log.flush() else: # still need to run gm if rotamers were fixed print >> self.log, "Not using ncs" # need to update SS manager for the whole model here. if self.params.use_ss_restraints: ss_manager = manager( pdb_hierarchy=self.whole_pdb_h, geometry_restraints_manager=self.whole_grm.geometry, sec_str_from_pdb_file=self.filtered_whole_ann, params=None, mon_lib_srv=self.mon_lib_srv, verbose=-1, log=self.log) self.whole_grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=self.whole_pdb_h, log=self.log) print >> self.log, "loop_ideal.ref_exclusion_selection", loop_ideal.ref_exclusion_selection print >> self.log, "Minimizing whole model" self.minimize( hierarchy=self.whole_pdb_h, xrs=self.whole_xrs, grm=self.whole_grm, ncs_restraints_group_list=filtered_ncs_restr_group_list, original_pdb_h=ref_hierarchy_for_final_gm, excl_string_selection=loop_ideal.ref_exclusion_selection, ss_annotation=self.ann, reference_map = self.reference_map) self.shift_and_write_result( hierarchy=self.whole_pdb_h, fname_suffix="all_idealized", grm=self.whole_grm) self.final_model_statistics = geometry_no_grm( pdb_hierarchy=iotbx.pdb.input( source_info=None, lines=self.whole_pdb_h.as_pdb_string()).construct_hierarchy(), molprobity_scores=True) # self.original_boxed_hierarchy.write_pdb_file(file_name="original_boxed_end.pdb") self.time_for_run = time() - t_0
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation = secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 # self.berkeley_p_before_minimization_rama_outliers = None self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" number_of_ccd_trials = 0 # print "logic expr outcome:", (number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if self.berkeley_p_before_minimization_rama_outliers <= 0.001: print >> self.log, "No ramachandran outliers, skipping CCD step." if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (number_of_ccd_trials < self.params.number_of_ccd_trials and self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 and self.params.enabled): print "CCD try number, outliers:", number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers number_of_ccd_trials += 1 processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain( c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % ( c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string( )).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[: -3] # self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minimization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count if self.params.minimize_whole: print >> self.log, "minimizing whole thing..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, log=self.log) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, log=null_out(), verbose=True): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation=secondary_structure_annotation xrs = pdb_hierarchy.extract_xray_structure() asc = pdb_hierarchy.atom_selection_cache() self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.r = rama_eval() self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 # self.berkeley_p_before_minimization_rama_outliers = None self.berkeley_p_after_minimiaztion_rama_outliers = None self.ref_exclusion_selection = "" number_of_ccd_trials = 0 # print "logic expr outcome:", (number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if self.berkeley_p_before_minimization_rama_outliers <= 0.001: print >> self.log, "No ramachandran outliers, skipping CCD step." if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (number_of_ccd_trials < self.params.number_of_ccd_trials and self.berkeley_p_before_minimization_rama_outliers > 0.001 and self.params.enabled): print "CCD try number, outliers:", number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers number_of_ccd_trials += 1 processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minimization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count if self.params.minimize_whole: print >> self.log, "minimizing whole thing..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, log=None, ss_annotation=self.secondary_structure_annotation) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count