def benchmark_structure(pdb_in, mon_lib_srv, ener_lib, verbose=False, w=1.0): log = StringIO() params = mmtbx.model.manager.get_default_pdb_interpretation_params() params.pdb_interpretation.peptide_link.ramachandran_restraints = True params.pdb_interpretation.ramachandran_plot_restraints.inject_emsley8k_into_oldfield_favored = False model = mmtbx.model.manager(model_input=pdb_in, pdb_interpretation_params=params, log=null_out(), build_grm=True) grm = model.get_restraints_manager().geometry pdb_hierarchy = model.get_hierarchy() r0 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) atoms = pdb_hierarchy.atoms() sites_cart_1 = atoms.extract_xyz().deep_copy() sites_cart_2 = sites_cart_1.deep_copy() assert (grm is not None) e = grm.energies_sites(sites_cart=sites_cart_1) b0 = e.bond_deviations()[-1] a0 = e.angle_deviations()[-1] flags = cctbx.geometry_restraints.flags.flags(default=True) lbfgs = geometry_minimization.lbfgs( sites_cart=sites_cart_1, correct_special_position_tolerance=1.0, geometry_restraints_manager=grm, geometry_restraints_flags=flags, lbfgs_termination_params=scitbx.lbfgs.termination_parameters( max_iterations=500)) a1 = lbfgs.rmsd_angles b1 = lbfgs.rmsd_bonds atoms.set_xyz(sites_cart_1) r1 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) rama_params = ramachandran.master_phil.fetch().extract( ).ramachandran_plot_restraints rama_manager = ramachandran.ramachandran_manager(pdb_hierarchy, rama_params, log) grm.set_ramachandran_restraints(rama_manager) lbfgs = geometry_minimization.lbfgs( sites_cart=sites_cart_2, correct_special_position_tolerance=1.0, geometry_restraints_manager=grm, geometry_restraints_flags=flags, lbfgs_termination_params=scitbx.lbfgs.termination_parameters( max_iterations=500)) a2 = lbfgs.rmsd_angles b2 = lbfgs.rmsd_bonds atoms.set_xyz(sites_cart_2) r2 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) return group_args(a0=a0, a1=a1, a2=a2, b0=b0, b1=b1, b2=b2, r0=r0, r1=r1, r2=r2)
def benchmark_structure (pdb_in, mon_lib_srv, ener_lib, verbose=False, w=1.0) : params = pdb_interpretation.master_params.extract() processed_pdb_file = pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=params, pdb_inp=pdb_in, log=StringIO()) log = StringIO() pdb_hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy r0 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) atoms = pdb_hierarchy.atoms() sites_cart_1 = atoms.extract_xyz().deep_copy() sites_cart_2 = sites_cart_1.deep_copy() grm = processed_pdb_file.geometry_restraints_manager() assert (grm is not None) e = grm.energies_sites(sites_cart=sites_cart_1) b0 = e.bond_deviations()[-1] a0 = e.angle_deviations()[-1] flags = cctbx.geometry_restraints.flags.flags(default=True) lbfgs = geometry_minimization.lbfgs( sites_cart=sites_cart_1, correct_special_position_tolerance=1.0, geometry_restraints_manager=grm, geometry_restraints_flags=flags, lbfgs_termination_params=scitbx.lbfgs.termination_parameters( max_iterations=500)) a1 = lbfgs.rmsd_angles b1 = lbfgs.rmsd_bonds atoms.set_xyz(sites_cart_1) r1 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) rama_params = ramachandran.master_phil.fetch().extract() rama_manager = ramachandran.ramachandran_manager( pdb_hierarchy, None, rama_params, log) grm.set_ramachandran_restraints(rama_manager) lbfgs = geometry_minimization.lbfgs( sites_cart=sites_cart_2, correct_special_position_tolerance=1.0, geometry_restraints_manager=grm, geometry_restraints_flags=flags, lbfgs_termination_params=scitbx.lbfgs.termination_parameters( max_iterations=500)) a2 = lbfgs.rmsd_angles b2 = lbfgs.rmsd_bonds atoms.set_xyz(sites_cart_2) r2 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) return group_args( a0=a0, a1=a1, a2=a2, b0=b0, b1=b1, b2=b2, r0=r0, r1=r1, r2=r2)
def benchmark_structure(pdb_in, mon_lib_srv, ener_lib, verbose=False, w=1.0): params = pdb_interpretation.master_params.extract() processed_pdb_file = pdb_interpretation.process(mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=params, pdb_inp=pdb_in, log=StringIO()) log = StringIO() pdb_hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy r0 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) atoms = pdb_hierarchy.atoms() sites_cart_1 = atoms.extract_xyz().deep_copy() sites_cart_2 = sites_cart_1.deep_copy() grm = processed_pdb_file.geometry_restraints_manager() assert (grm is not None) e = grm.energies_sites(sites_cart=sites_cart_1) b0 = e.bond_deviations()[-1] a0 = e.angle_deviations()[-1] flags = cctbx.geometry_restraints.flags.flags(default=True) lbfgs = geometry_minimization.lbfgs( sites_cart=sites_cart_1, correct_special_position_tolerance=1.0, geometry_restraints_manager=grm, geometry_restraints_flags=flags, lbfgs_termination_params=scitbx.lbfgs.termination_parameters( max_iterations=500)) a1 = lbfgs.rmsd_angles b1 = lbfgs.rmsd_bonds atoms.set_xyz(sites_cart_1) r1 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) rama_params = ramachandran.master_phil.fetch().extract() rama_manager = ramachandran.ramachandran_manager(pdb_hierarchy, None, rama_params, log) grm.set_ramachandran_restraints(rama_manager) lbfgs = geometry_minimization.lbfgs( sites_cart=sites_cart_2, correct_special_position_tolerance=1.0, geometry_restraints_manager=grm, geometry_restraints_flags=flags, lbfgs_termination_params=scitbx.lbfgs.termination_parameters( max_iterations=500)) a2 = lbfgs.rmsd_angles b2 = lbfgs.rmsd_bonds atoms.set_xyz(sites_cart_2) r2 = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) return group_args(a0=a0, a1=a1, a2=a2, b0=b0, b1=b1, b2=b2, r0=r0, r1=r1, r2=r2)
def __init__(self, model1, model2, params=None, log=null_out()): self.plots = None self.params = params if self.params is None: self.params = rcompare.get_default_params().comparama self.rama1 = ramalyze(model1.get_hierarchy(), out=null_out()) self.rama2 = ramalyze(model2.get_hierarchy(), out=null_out()) self.results = [] # looping technique trying to recover when 1 or several residues are # missing i1 = i2 = 0 self.skipped_1 = [] self.skipped_2 = [] while i1 < len(self.rama1.results) and i2 < len(self.rama2.results): r1 = self.rama1.results[i1] r2 = self.rama2.results[i2] if r1.id_str() == r2.id_str(): # regular calculations diff = math.sqrt((r1.phi - r2.phi)**2 + (r1.psi - r2.psi)**2) v = determine_validation_change_text(r1, r2) diff2 = math.sqrt( get_distance(r1.phi, r2.phi)**2 + get_distance(r1.psi, r2.psi)**2) diff3 = two_rama_points((r1.phi, r1.psi), (r2.phi, r2.psi)).min_length() assert approx_equal(diff2, diff3), "%s, %s" % ((r1.phi, r1.psi), (r2.phi, r2.psi)) self.results.append( (r1.id_str(), diff2, r1.phi, r1.psi, r2.phi, r2.psi, v, r2.res_type, r1.score / 100, r2.score / 100)) i1 += 1 i2 += 1 else: skip_1 = False # figure out what to skip if r1.chain_id == r2.chain_id: if r1.resseq_as_int() < r2.resseq_as_int(): skip_1 = True else: if r1.resseq_as_int() > r2.resseq_as_int(): skip_1 = True if skip_1: i1 += 1 self.skipped_1.append(r1) else: i2 += 1 self.skipped_2.append(r2) self.res_columns = None if len(self.results) > 0: self.res_columns = list(zip(*self.get_results()))
def __init__(self, pdb_hierarchy, params=None, log=null_out(), verbose=True): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy xrs = pdb_hierarchy.extract_xray_structure() asc = pdb_hierarchy.atom_selection_cache() self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.params = self.process_params(params) self.log = log self.verbose = verbose self.r = ramachandran_eval.RamachandranEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None self.ref_exclusion_selection = "" for chain in pdb_hierarchy.only_model().chains(): print >>self.log, "Idealizing chain %s" % chain.id selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.original_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >>self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >>self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain(hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart(self.resulting_pdb_h, ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent if self.params.minimize_whole: print >>self.log, "minimizing whole thing..." print >>self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection minimize_hierarchy(self.resulting_pdb_h, xrs, self.original_pdb_h, self.ref_exclusion_selection, log=None) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent
def exclude_outliers_from_reference_restraints_selection( pdb_hierarchy, restraints_selection): from mmtbx.validation.ramalyze import ramalyze # the import below is SLOW!!! from mmtbx.rotamer.rotamer_eval import RotamerEval assert restraints_selection is not None # ramachandran plot outliers rama_outlier_selection = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False).outlier_selection() rama_outlier_selection = flex.bool(restraints_selection.size(), rama_outlier_selection) # rotamer outliers rota_outlier_selection = flex.size_t() rotamer_manager = RotamerEval() # SLOW!!! for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): conformers = residue_group.conformers() if (len(conformers) > 1): continue for conformer in residue_group.conformers(): residue = conformer.only_residue() if (rotamer_manager.evaluate_residue(residue) == "OUTLIER" ): rota_outlier_selection.extend( residue.atoms().extract_i_seq()) rota_outlier_selection = flex.bool(restraints_selection.size(), rota_outlier_selection) outlier_selection = rama_outlier_selection | rota_outlier_selection return restraints_selection & (~outlier_selection)
def exclude_outliers_from_reference_restraints_selection( pdb_hierarchy, restraints_selection): from mmtbx.validation.ramalyze import ramalyze # the import below is SLOW!!! from mmtbx.rotamer.rotamer_eval import RotamerEval assert restraints_selection is not None # ramachandran plot outliers rama_outlier_selection = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False).outlier_selection() rama_outlier_selection = flex.bool(restraints_selection.size(), rama_outlier_selection) # rotamer outliers rota_outlier_selection = flex.size_t() rotamer_manager = RotamerEval() # SLOW!!! for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): conformers = residue_group.conformers() if(len(conformers)>1): continue for conformer in residue_group.conformers(): residue = conformer.only_residue() if(rotamer_manager.evaluate_residue(residue)=="OUTLIER"): rota_outlier_selection.extend(residue.atoms().extract_i_seq()) rota_outlier_selection = flex.bool(restraints_selection.size(), rota_outlier_selection) outlier_selection = rama_outlier_selection | rota_outlier_selection return restraints_selection & (~outlier_selection)
def _write_plots_if_needed(self, model, label, type_of_plot='whole'): write_plot = getattr(self.params, "write_%s_plot" % type_of_plot) write_general_only = getattr(self.params, "write_%s_general_only" % type_of_plot) if write_plot: self.rama = ramalyze(model.get_hierarchy(), out=null_out()) self.plots = self.rama.get_plots(show_labels=False, point_style='.', markersize=3, markeredgecolor="red", dpi=300, markerfacecolor="yellow") plots_to_write = range(6) if write_general_only: plots_to_write = [0] for i in plots_to_write: file_label = res_type_labels[i].replace("/", "_") fn = "%s.png" % self.get_default_output_filename( prefix='%s_%s_' % (self.inp_fn, label), suffix=file_label, serial=Auto) if os.path.isfile(fn) and not self.params.output.overwrite: raise Sorry( "%s already exists and overwrite is set to False." % fn) print("Saving:", fn, file=self.logger) self.plots[i].save_image(fn, dpi=300)
def validate_single_model(self, i_model): hierarchy = self.pdb_hierarchies[i_model] rama_validation = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=False) rota_validation = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=False) return rama_validation, rota_validation
def validate_single_model (self, i_model) : hierarchy = self.pdb_hierarchies[i_model] rama_validation = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=False) rota_validation = rotalyze.rotalyze( pdb_hierarchy=hierarchy, outliers_only=False) return rama_validation, rota_validation
def ramachandran(self): if self.cached_rama is None: self.cached_rama = ramalyze(pdb_hierarchy=self.pdb_hierarchy, outliers_only=False) return group_args( outliers=self.cached_rama.percent_outliers, allowed=self.cached_rama.percent_allowed, favored=self.cached_rama.percent_favored, ramalyze=self.cached_rama #XXX Bulky object -- REMOVE! )
def exercise(): pdb_inp = iotbx.pdb.input(lines=pdb_str.split("\n"), source_info=None) model = mmtbx.model.manager(model_input=pdb_inp) pdb_hierarchy = model.get_hierarchy() r = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) assert ( len(r.results) == 3 ), 'Supposed to fail until fixed. Ramalyze results not available for all models.'
def exercise_1(prefix="tst_add_arrows_on_plot_1"): model = mmtbx.model.manager( model_input=iotbx.pdb.input(source_info=None, lines=pdb_str)) rama = ramalyze(model.get_hierarchy(), out=null_out()) plots = rama.get_plots(show_labels=True, point_style='bo', markersize=1, markeredgecolor="black", dpi=300, markerfacecolor="white") ad_testing = [] ad_testing.append(((60, -120), (120, -120))) ad_testing.append(((-125, 120), (-125, 179))) ad_testing.append(((-120, 120), (-120, -120))) # wrapping up ad_testing.append(((-115, -120), (-115, 120))) # wrapping down ad_testing.append(((120, -60), (-120, -60))) # wrapping right ad_testing.append(((-120, -65), (120, -65))) # wrapping left ad_testing.append(((120, 0), (-120, 60))) # diag right ad_testing.append(((-120, 55), (120, -5))) # diag left ad_testing.append(((-60, 120), (0, -120))) # diag up ad_testing.append(((5, -120), (-55, 120))) # diag up ad_testing.append( ((150, 150), (-150, -150))) # going to top right corner straight ad_testing.append( ((140, 155), (-130, -140))) # going to top right corner not straight ad_testing.append( ((150, -150), (-150, 150))) # going to bottom right corner straight ad_testing.append( ((140, -155), (-130, 140))) # going to bottom right corner not straight ad_testing.append( ((-150, 150), (150, -150))) # going to top left corner straight ad_testing.append( ((-140, 155), (130, -140))) # going to top left corner not straight ad_testing.append( ((-150, -150), (150, 150))) # going to bottom left corner straight ad_testing.append( ((-140, -155), (130, 140))) # going to bottom left corner not straight plot = plots[0] add_arrows_on_plot(plot, ad_testing, color="red") plot_file_name = "%s.png" % prefix plot.save_image(plot_file_name, dpi=300) img = Image.open(plot_file_name) hist = img.histogram() # print(hist) hist_ok = True for ref, res in zip(reference_hist, hist): if (ref != res) and not (0.98 < ref / res < 1.02): hist_ok = False # print (ref, res, ref/res) assert hist_ok
def run_ramalyze(self) : from mmtbx.validation import ramalyze ramalyze_result = ramalyze.ramalyze(self.hierarchy) for result in ramalyze_result.results : resd = mdb_utils.get_resd(self.pdb_code,result) MDBRes = mdb_utils.MDBResidue(**resd) reskey = MDBRes.get_residue_key() if reskey not in self.residues.keys(): # alternates likely exist reskeys = self.get_alternate_keys(resd) for k in reskeys : self.residues[k].add_ramalyze_result(result) else : # No alternates self.residues[reskey].add_ramalyze_result(result)
def exercise_1(prefix="tst_add_arrows_on_plot_1"): model = mmtbx.model.manager( model_input=iotbx.pdb.input(source_info=None, lines=pdb_str)) rama = ramalyze(model.get_hierarchy(), out=null_out()) plots = rama.get_plots(show_labels=True, point_style='bo', markersize=1, markeredgecolor="black", dpi=300, markerfacecolor="white") ad_testing = [] ad_testing.append(((60, -120), (120, -120))) ad_testing.append(((-125, 120), (-125, 179))) ad_testing.append(((-120, 120), (-120, -120))) # wrapping up ad_testing.append(((-115, -120), (-115, 120))) # wrapping down ad_testing.append(((120, -60), (-120, -60))) # wrapping right ad_testing.append(((-120, -65), (120, -65))) # wrapping left ad_testing.append(((120, 0), (-120, 60))) # diag right ad_testing.append(((-120, 55), (120, -5))) # diag left ad_testing.append(((-60, 120), (0, -120))) # diag up ad_testing.append(((5, -120), (-55, 120))) # diag up ad_testing.append( ((150, 150), (-150, -150))) # going to top right corner straight ad_testing.append( ((140, 155), (-130, -140))) # going to top right corner not straight ad_testing.append( ((150, -150), (-150, 150))) # going to bottom right corner straight ad_testing.append( ((140, -155), (-130, 140))) # going to bottom right corner not straight ad_testing.append( ((-150, 150), (150, -150))) # going to top left corner straight ad_testing.append( ((-140, 155), (130, -140))) # going to top left corner not straight ad_testing.append( ((-150, -150), (150, 150))) # going to bottom left corner straight ad_testing.append( ((-140, -155), (130, 140))) # going to bottom left corner not straight plot = plots[0] add_arrows_on_plot(plot, ad_testing, color="black") plot_file_name = "%s.png" % prefix plot.save_image(plot_file_name, dpi=300) hasher = hashlib.md5() with open(plot_file_name, 'rb') as afile: buf = afile.read() hasher.update(buf) fhash = hasher.hexdigest() assert fhash == reference_md5, "%s != %s" % (fhash, reference_md5)
def worker(in_queue, out_queue): while True: if in_queue.empty(): exit(0) pdb_id = in_queue.get() decompress_pdb_redo_dir(pdb_id, suffixes={0}) pdb_path = os.path.join(PDB_REDO_DATA_DIR, pdb_id, pdb_id + '_0cyc.pdb') pdb_in = file_reader.any_file(file_name=pdb_path) hierarchy = pdb_in.file_object.hierarchy rama_analysis = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=False) out = StringIO() rama_analysis.show_old_output(out=out, verbose=False) output = out.getvalue() class_results = {} for pair in CLASSIFICATION_PAIRS: class_results[pair] = 0 score_results = {} for clf in CLASSIFICATIONS: score_results[clf] = [] for line in output.split('\n'): if len(line) == 0: continue chain_id = line[:2].strip() seqnum = int(line[2:6].strip()) splitline = [x.strip() for x in line[6:].split(':')] code = splitline[0].strip() if code not in utils.THREE_LETTER_CODES[0]: continue if code == 'MSE': code = 'MET' phi, psi = [float(x) for x in splitline[2:4]] phi *= pi / 180 psi *= pi / 180 iris_score = utils.calculate_ramachandran_score( None, code, phi, psi) iris_class = CLASSIFICATIONS[ 0] if iris_score < THRESHOLDS[1] else CLASSIFICATIONS[ 1] if iris_score < THRESHOLDS[0] else CLASSIFICATIONS[2] mp_class = splitline[-2][0].upper() + splitline[-2][1:].lower() class_results[(iris_class, mp_class)] += 1 score_results[mp_class].append(iris_score) cleanup_pdb_redo_dir(pdb_id) out_queue.put((pdb_id, class_results, score_results))
def run(self): results = [] for model_name in self.data_manager.get_model_names(): hierarchy = self.data_manager.get_model(model_name).get_hierarchy() hierarchy.atoms().reset_i_seq() result = ramalyze(pdb_hierarchy=hierarchy, show_errors=None, outliers_only=self.params.outliers_only, out=self.logger, quiet=False) results.append(result) if len(self.data_manager.get_model_names()) > 1: self.params.verbose = False print('\nmodel : %s' % model_name, file=self.logger) # combine models result = results[0] for i in range(1, len(results)): result += results[i] if self.params.verbose: result.show_old_output(out=self.logger, verbose=True) if self.params.plot: plot_file_base = self.params.output_prefix if plot_file_base is None: plot_file_base = os.path.splitext( os.path.basename( self.data_manager.get_model_names()[0]))[0] result.write_plots(plot_file_base=plot_file_base, out=self.logger, show_labels=self.params.show_labels, point_style=self.params.point_style, markerfacecolor=self.params.markerfacecolor, show_filling=self.params.show_filling, show_contours=self.params.show_contours, dpi=self.params.dpi, markeredgecolor=self.params.markeredgecolor, markersize=self.params.markersize) if self.params.wxplot: try: import wxtbx.app except ImportError as e: raise Sorry("wxPython not available.") else: app = wxtbx.app.CCTBXApp(0) result.display_wx_plots() app.MainLoop()
def compute(hierarchies, params, log, quiet=False, plot_file_base_default=None): results = [] for hierarchy in hierarchies: result = ramalyze(pdb_hierarchy=hierarchy, show_errors=None, outliers_only=params.outliers_only, out=log, quiet=quiet) results.append(result) # combine models result = results[0] for i in range(1, len(results)): result += results[i] if params.verbose: result.show_old_output(out=log, verbose=True) if params.plot: plot_file_base = params.output_prefix if plot_file_base is None: plot_file_base = plot_file_base_default result.write_plots(plot_file_base=plot_file_base, out=log, show_labels=params.show_labels, point_style=params.point_style, markerfacecolor=params.markerfacecolor, show_filling=params.show_filling, show_contours=params.show_contours, dpi=params.dpi, markeredgecolor=params.markeredgecolor, markersize=params.markersize) if params.wxplot: try: import wxtbx.app except ImportError as e: raise Sorry("wxPython not available.") else: app = wxtbx.app.CCTBXApp(0) result.display_wx_plots() app.MainLoop()
def __init__( self, pdb_hierarchy, molprobity_scores=False, ): self.pdb_hierarchy = pdb_hierarchy self.clashscore = None self.ramachandran_outliers = None self.ramachandran_allowed = None self.ramachandran_favored = None self.rotamer_outliers = None self.c_beta_dev = None self.mpscore = None self.omglz = None self.n_cis_proline = None self.n_cis_general = None self.n_twisted_proline = None self.n_twisted_general = None if(molprobity_scores): self.ramalyze_obj = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) self.ramachandran_outliers = self.ramalyze_obj.percent_outliers self.ramachandran_allowed = self.ramalyze_obj.percent_allowed self.ramachandran_favored = self.ramalyze_obj.percent_favored self.rotalyze_obj = rotalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) self.rotamer_outliers = self.rotalyze_obj.percent_outliers self.cbetadev_obj = cbetadev( pdb_hierarchy = pdb_hierarchy, outliers_only = True, out = null_out()) self.c_beta_dev = self.cbetadev_obj.get_outlier_count() self.clashscore = clashscore(pdb_hierarchy=pdb_hierarchy).get_clashscore() self.mpscore = molprobity_score( clashscore = self.clashscore, rota_out = self.rotamer_outliers, rama_fav = self.ramachandran_favored) self.omglz = omegalyze.omegalyze( pdb_hierarchy=self.pdb_hierarchy, quiet=True) self.n_cis_proline = self.omglz.n_cis_proline() self.n_cis_general = self.omglz.n_cis_general() self.n_twisted_proline = self.omglz.n_twisted_proline() self.n_twisted_general = self.omglz.n_twisted_general()
def get_scores(self, model): rama_fav = ramalyze(pdb_hierarchy=model.get_hierarchy(), outliers_only=False).percent_favored cbeta = cbetadev(pdb_hierarchy=model.get_hierarchy(), outliers_only=True, out=null_out()).get_outlier_percent() rota = rotalyze(pdb_hierarchy=model.get_hierarchy(), outliers_only=False).percent_outliers b_rmsd = get_bonds_rmsd( restraints_manager=self.geometry_rmsd_manager.geometry, xrs=model.get_xray_structure()) clash = clashscore(pdb_hierarchy=model.get_hierarchy(), keep_hydrogens=False, fast=True, condensed_probe=True).get_clashscore() print "DEV: b_rmsd= %7.4f clash= %6.4f rota= %6.4f rama_fav= %5.4f cbeta= %6.4f" % ( b_rmsd, clash, rota, rama_fav, cbeta) return group_args(rama_fav=rama_fav, cbeta=cbeta, rota=rota, b_rmsd=b_rmsd, clash=clash)
def load_refinement(self, ref): self._pdb_file = ref.replace('.dat', '.pdb') self._ins_file = ref.replace('.dat', '.ins') self._lst_file = ref.replace('.dat', '.lst') lst = LSTParser(self._lst_file) pdb_io = pdb.input(file_name=self._pdb_file) self._chain_lookup = PDBTools().get_chains(self._pdb_file) r = clashscore() self._clash_score, self._clashes = clashscore.analyze_clashes(r,pdb_io)# verbose=True) self._clashes = self._clashes[''].split('\n') rama = ramalyze() output, self._rama_data = rama.analyze_pdb(pdb_io=pdb_io, outliers_only=False) rota = rotalyze() output, self._rotamer_data = rota.analyze_pdb(pdb_io, outliers_only=False) r = cbetadev() output, summary, self._cb_data = cbetadev.analyze_pdb(r,pdb_io=pdb_io,outliers_only=True) self.sizer = wx.BoxSizer(wx.VERTICAL) self._rmsds = lst.get_stats() # Summary self.stats_sizer = wx.FlexGridSizer(cols=3, rows=0, vgap=5, hgap=5) self.stats_sizer.Add(wx.StaticText(self, -1, 'RMSD Bonds')) self.stats_sizer.Add(wx.StaticText(self, -1, '%.3f' % (self._rmsds[0]))) self.stats_sizer.Add(wx.StaticText(self, -1, '')) self.stats_sizer.Add(wx.StaticText(self, -1, 'RMSD Angles')) self.stats_sizer.Add(wx.StaticText(self, -1, '%.3f' % (self._rmsds[1]))) self.stats_sizer.Add(wx.StaticText(self, -1, ''), 0, wx.EXPAND|wx.BOTTOM, 10) self.stats_sizer.Add(wx.StaticText(self, -1, 'B Factor (Protein)')) self.stats_sizer.Add(wx.StaticText(self, -1, '%.2f' % (self._residues['avg']['pro']))) self.stats_sizer.Add(wx.StaticText(self, -1, ''), 0, wx.EXPAND|wx.BOTTOM) self.stats_sizer.Add(wx.StaticText(self, -1, 'B Factor (Solvent)')) self.stats_sizer.Add(wx.StaticText(self, -1, '%.2f' % (self._residues['avg']['sol']))) self.stats_sizer.Add(wx.StaticText(self, -1, ''), 0, wx.EXPAND|wx.BOTTOM) self.stats_sizer.Add(wx.StaticText(self, -1, 'B Factor (All)')) self.stats_sizer.Add(wx.StaticText(self, -1, '%.2f' % (self._residues['avg']['all']))) self.stats_sizer.Add(wx.StaticText(self, -1, ''), 0, wx.EXPAND|wx.BOTTOM, 10) self.stats_sizer.Add(wx.StaticText(self, -1, 'Ramachandran Outliers')) self.stats_sizer.Add(wx.StaticText(self, -1, '%.1f' % (rama.get_outliers_count_and_fraction()[1]*100) + '%')) self.stats_sizer.Add(wx.StaticText(self, -1, '(Goal ' + rama.get_outliers_goal()+')')) self.stats_sizer.Add(wx.StaticText(self, -1, 'Ramachandran Favoured')) self.stats_sizer.Add(wx.StaticText(self, -1, '%.1f' % (rama.get_favored_count_and_fraction()[1]*100) + '%')) self.stats_sizer.Add(wx.StaticText(self, -1, '(Goal ' + rama.get_favored_goal()+')'), 0, wx.EXPAND|wx.BOTTOM, 10) self.stats_sizer.Add(wx.StaticText(self, -1, 'Rotamer Outliers')) self.stats_sizer.Add(wx.StaticText(self, -1, '%.1f' % (rota.get_outliers_count_and_fraction()[1]*100) + '%')) self.stats_sizer.Add(wx.StaticText(self, -1, '(Goal ' + rota.get_outliers_goal()+')')) self.stats_sizer.Add(wx.StaticText(self, -1, 'C-beta Outliers')) self.stats_sizer.Add(wx.StaticText(self, -1, '%d' % len(self._cb_data))) self.stats_sizer.Add(wx.StaticText(self, -1, '(Goal 0)')) self.stats_sizer.Add(wx.StaticText(self, -1, 'Clashscore')) self.stats_sizer.Add(wx.StaticText(self, -1, '%d' % self._clash_score[''])) self.sizer.Add(self.stats_sizer, 0, wx.ALL, 10) # Ramachandran Outliers self.rama_sizer = wx.StaticBoxSizer(wx.StaticBox(self, -1, 'Ramachandran Outliers'),wx.VERTICAL) if rama.get_outliers_count_and_fraction()[1] > 0: rama_list = wx.ListCtrl(self, -1, style=wx.LC_REPORT) self.Bind(wx.EVT_LIST_ITEM_SELECTED, self._show_rama, rama_list) sizes = [50, 50, 150, 80, 80, 80] for i, item in enumerate(['Chain', 'No', 'Residue', 'Score', 'Phi', 'Psi']): rama_list.InsertColumn(i, item, width = sizes[i]) i = 0 self._rama_outliers = [] for r in self._rama_data: (chain_id,resseq,resname,quality,phi,psi,status,pos_name,xyz) = r if status == 'OUTLIER': self._rama_outliers.append(r) rama_list.InsertStringItem(i, str(chain_id)) rama_list.SetStringItem(i, 1, str(resseq)) rama_list.SetStringItem(i, 2, resname) rama_list.SetStringItem(i, 3, '%.2f' % quality) rama_list.SetStringItem(i, 4, '%.1f' % phi) rama_list.SetStringItem(i, 5, '%.1f' % psi) i += 1 self.rama_sizer.Add(wx.StaticText(self, -1, '%d Ramachandran outliers found' % i), 0) self.rama_sizer.Add(rama_list, 0, wx.EXPAND|wx.ALL, 10) self.rama_list = rama_list else: self.rama_sizer.Add(wx.StaticText(self, -1, 'No Ramachandran Outliers'), 0) self.rama_sizer.Add(wx.Button(self, 0, 'Show Ramachandran Plot'), 0) self.Bind(wx.EVT_BUTTON, self.show_ramachandran, id=0) self.sizer.Add(self.rama_sizer, 0, wx.EXPAND|wx.ALL, 10) # Rotamer Outliers self.rota_sizer = wx.StaticBoxSizer(wx.StaticBox(self, -1, 'Rotamer Outliers'),wx.VERTICAL) if rota.get_outliers_count_and_fraction()[1] > 0: rota_list = wx.ListCtrl(self, -1, style=wx.LC_REPORT) self.Bind(wx.EVT_LIST_ITEM_SELECTED, self._show_rota, rota_list) sizes = [50, 50, 150, 80, 80, 80, 80, 80] for i, item in enumerate(['Chain', 'No', 'Residue', 'Score', 'Chi1', 'Chi2', 'Chi3', 'Chi4']): rota_list.InsertColumn(i, item, width = sizes[i]) i = 0 self._rota_outliers = [] for r in self._rotamer_data: (chain_id,resseq,resname,quality,chi1,chi2,chi3,chi4,status,xyz) = r if status == 'OUTLIER': self._rota_outliers.append(r) rota_list.InsertStringItem(i, str(chain_id)) rota_list.SetStringItem(i, 1, str(resseq)) rota_list.SetStringItem(i, 2, resname) rota_list.SetStringItem(i, 3, '%.2f' % quality) rota_list.SetStringItem(i, 4, '%.1f' % chi1) rota_list.SetStringItem(i, 5, '%.1f' % chi2 if chi2 is not None else 'None') rota_list.SetStringItem(i, 6, '%.1f' % chi3 if chi3 is not None else 'None') rota_list.SetStringItem(i, 7, '%.1f' % chi4 if chi4 is not None else 'None') i += 1 self.rota_sizer.Add(wx.StaticText(self, -1, '%d rotamer outliers found' % i), 0) self.rota_sizer.Add(rota_list, 1, wx.EXPAND|wx.ALL, 5) self.rota_list = rota_list else: self.rota_sizer.Add(wx.StaticText(self, -1, 'No Rotamer Outliers')) self.rota_sizer.Add(wx.Button(self, 1, 'Show Chi1-Chi2 Plots')) self.Bind(wx.EVT_BUTTON, self.show_rotamer, id=1) self.sizer.Add(self.rota_sizer, 0, wx.EXPAND|wx.ALL, 10) # C-beta Outliers self.cb_sizer = wx.StaticBoxSizer(wx.StaticBox(self, -1, 'C-beta Outliers'),wx.VERTICAL) if len(self._cb_data) > 0: cb_list = wx.ListCtrl(self, -1, style=wx.LC_REPORT) self.Bind(wx.EVT_LIST_ITEM_SELECTED, self._show_cb, cb_list) sizes = [50, 50, 150, 100, 100] for i, item in enumerate(['Chain', 'No', 'Residue', 'Deviation', 'Angle']): cb_list.InsertColumn(i, item, width = sizes[i]) for i,r in enumerate(self._cb_data): (pdbf, alt, resname, chain_id, resseq, resseq2, dev, diheral, occ, altchar, xyz) = r cb_list.InsertStringItem(i, str(chain_id)) cb_list.SetStringItem(i, 1, str(resseq+resseq2)) cb_list.SetStringItem(i, 2, resname.upper()) cb_list.SetStringItem(i, 3, '%.3f' % dev) cb_list.SetStringItem(i, 4, '%.2f' % diheral) self.cb_sizer.Add(wx.StaticText(self, -1, '%d C-beta outliers found' % len(self._cb_data)), 0) self.cb_sizer.Add(cb_list, 1, wx.EXPAND|wx.ALL, 5) self.cb_list = cb_list else: self.cb_sizer.Add(wx.StaticText(self, -1, 'No C-beta Outliers')) self.sizer.Add(self.cb_sizer, 0, wx.EXPAND|wx.ALL, 10) # Bad Clashes self.clash_sizer = wx.StaticBoxSizer(wx.StaticBox(self, -1, 'All Atom Contacts'), wx.VERTICAL) if len(self._clashes) > 0: clash_list = wx.ListCtrl(self, -1, style=wx.LC_REPORT) self.Bind(wx.EVT_LIST_ITEM_SELECTED, self._show_clash, clash_list) sizes = [50, 50, 80, 80, 50, 50, 80, 80, 100] for i, item in enumerate(['Chain', 'No', 'Residue', 'Atom', 'Chain', 'No', 'Residue', 'Atom', 'Overlap']): clash_list.InsertColumn(i, item, width = sizes[i]) for i,r in enumerate(self._clashes): ' 78 ILE CD1 83 LEU HD21 :-0.402' clash_list.InsertStringItem(i, r[0:2].strip()) clash_list.SetStringItem(i, 1, r[2:6].strip()) clash_list.SetStringItem(i, 2, r[7:10].strip()) clash_list.SetStringItem(i, 3, r[11:15].strip()) clash_list.SetStringItem(i, 4, r[16:18].strip()) clash_list.SetStringItem(i, 5, r[19:22].strip()) clash_list.SetStringItem(i, 6, r[23:26].strip()) clash_list.SetStringItem(i, 7, r[28:32].strip()) clash_list.SetStringItem(i, 8, r[34:39].strip()) self.clash_sizer.Add(wx.StaticText(self, -1, '%d bad clashes found' % i), 0) self.clash_sizer.Add(clash_list, 1, wx.EXPAND|wx.ALL, 5) self.clash_list = clash_list else: self.clash_sizer.Add(wx.StaticText(self, -1, 'No Bad Clashes')) self.sizer.Add(self.clash_sizer, 0, wx.EXPAND|wx.ALL, 10) self._split_sites, self._npds = lst.get_site_info() # Split Sites self.split_sizer = wx.StaticBoxSizer(wx.StaticBox(self, -1, 'Split Sites'), wx.VERTICAL) if len(self._split_sites) > 0: split_list = wx.ListCtrl(self, -1, style=wx.LC_REPORT) self.Bind(wx.EVT_LIST_ITEM_SELECTED, self._show_split, split_list) sizes = [100,100,100,50,80,100] for i, item in enumerate(['U1', 'U2', 'U3', 'No', 'Residue', 'Atom']): split_list.InsertColumn(i, item, width=sizes[i]) i = 0 for j,s in enumerate(self._split_sites): if '_' in s[3]: split_list.InsertStringItem(i, str(s[0])) split_list.SetStringItem(i, 1, str(s[1])) split_list.SetStringItem(i, 2, str(s[2])) atom,id = s[3].split('_') split_list.SetStringItem(i, 3, str(id)) split_list.SetStringItem(i, 4, str()) split_list.SetStringItem(i, 5, str(atom)) i += 1 self.split_sizer.Add(wx.StaticText(self, -1, '%d possible split sites found' % len(self._split_sites))) self.split_sizer.Add(split_list, 0, wx.EXPAND|wx.ALL, 5) self.split_list = split_list else: self.split_sizer.Add(wx.StaticText(self, -1, 'No split sites found')) self.sizer.Add(self.split_sizer, 0, wx.EXPAND|wx.ALL, 10) # NPDs self.npd_sizer = wx.StaticBoxSizer(wx.StaticBox(self, -1, 'Non Positive Definites'), wx.VERTICAL) if len(self._npds) > 0: npd_list = wx.ListCtrl(self, -1, style=wx.LC_REPORT) self.Bind(wx.EVT_LIST_ITEM_SELECTED, self._show_npd, npd_list) sizes = [100,100,100,50,80,100] for i, item in enumerate(['U1', 'U2', 'U3', 'No', 'Residue', 'Atom']): npd_list.InsertColumn(i, item, width=sizes[i]) for i, s in enumerate(self._npds): npd_list.InsertStringItem(i, str(s[0])) npd_list.SetStringItem(i, 1, str(s[1])) npd_list.SetStringItem(i, 2, str(s[2])) atom, id = s[3].split('_') npd_list.SetStringItem(i, 3, str(id)) npd_list.SetStringItem(i, 4, str()) npd_list.SetStringItem(i, 5, str(atom)) self.npd_sizer.Add(wx.StaticText(self, -1, '%d non positive definite sites found' % len(self._npds))) self.npd_sizer.Add(npd_list, 0, wx.EXPAND|wx.ALL, 5) self.npd_list = npd_list else: self.npd_sizer.Add(wx.StaticText(self, -1, 'No non positive definite sites found')) self.sizer.Add(self.npd_sizer, 0, wx.EXPAND|wx.ALL, 10) self.SetSizer(self.sizer) self.SetAutoLayout(1) self.SetupScrolling()
def exercise_ramalyze(): from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/jcm.pdb", test=os.path.isfile) if (regression_pdb is None): print "Skipping exercise_ramalyze(): input pdb (jcm.pdb) not available" return if (find_rotarama_data_dir(optional=True) is None): print "Skipping exercise_ramalyze(): rotarama_data directory not available" return from iotbx import file_reader # Exercise 1 pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy pdb_io = pdb.input(file_name=regression_pdb) hierarchy.atoms().reset_i_seq() r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out) output = out.getvalue() assert output.count("OUTLIER") == 100 assert output.count("Favored") == 0 assert output.count("Allowed") == 0 assert output.count("General") == 64 assert output.count("Glycine") == 6 assert output.count("Trans-proline") == 1 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 4 assert output.count("Isoleucine or valine") == 25 assert (len(r.outlier_selection()) == 494) outlier_ids = set([]) atoms = hierarchy.atoms() for i_seq in r.outlier_selection(): atom = atoms[i_seq] atom_group = atoms[i_seq].parent() outlier_ids.add(atom_group.id_str()) outliers1 = sorted([o.atom_group_id_str() for o in r.results]) outliers2 = sorted(list(outlier_ids)) assert (outliers1 == outliers2) r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True]: if unpickle: r = loads(dumps(r)) for outlier in r.results: assert (len(outlier.xyz) == 3) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("OUTLIER") == 100 assert output.count("Favored") == 463 assert output.count("Allowed") == 162 assert output.count("General") == 514 assert output.count("Glycine") == 39 assert output.count("Trans-proline") == 23 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 21 assert output.count("Isoleucine or valine") == 128 numtotal = r.get_phi_psi_residues_count() assert r.get_outliers_count_and_fraction() == (100, 100. / numtotal) assert r.get_allowed_count_and_fraction() == (162, 162. / numtotal) assert r.get_favored_count_and_fraction() == (463, 463. / numtotal) assert r.get_general_count_and_fraction() == (514, 514. / numtotal) assert r.get_gly_count_and_fraction() == (39, 39. / numtotal) assert r.get_trans_pro_count_and_fraction() == (23, 23. / numtotal) assert r.get_cis_pro_count_and_fraction() == (0, 0. / numtotal) assert r.get_prepro_count_and_fraction() == (21, 21. / numtotal) assert r.get_ileval_count_and_fraction() == (128, 128. / numtotal) #assert numtotal == 75+154+494 #reasons for this math unclear assert numtotal == 725 output_lines = output.splitlines() assert len(output_lines) == 725 selected_lines = [] for x in [ 0, 1, 168, 169, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724 ]: selected_lines.append(output_lines[x]) assert not show_diff( "\n".join(selected_lines), """\ A 15 SER:35.07:-83.26:131.88:Favored:General A 16 SER:0.74:-111.53:71.36:Allowed:General A 191 ASP:2.66:-42.39:121.87:Favored:Pre-proline A 192 PRO:0.31:-39.12:-31.84:Allowed:Trans-proline B 368 LYS:56.44:-62.97:-53.28:Favored:General B 369 GLU:8.89:-44.36:-45.50:Favored:General B 370 LYS:40.00:-50.00:-39.06:Favored:General B 371 VAL:68.24:-60.38:-51.85:Favored:Isoleucine or valine B 372 LEU:0.02:-61.13:-170.23:OUTLIER:General B 373 ARG:0.02:60.09:-80.26:OUTLIER:General B 374 ALA:0.13:-37.21:-36.12:Allowed:General B 375 LEU:11.84:-89.81:-41.45:Favored:General B 376 ASN:84.33:-58.30:-41.39:Favored:General B 377 GLU:30.88:-56.79:-21.74:Favored:General""") assert (len(r.outlier_selection()) == 494) # Exercise 2 regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile) pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy hierarchy.atoms().reset_i_seq() r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out) output = out.getvalue() assert output.count("Favored") == 0 assert output.count("Allowed") == 0 assert output.count("OUTLIER") == 0 r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True]: if unpickle: r = loads(dumps(r)) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("Favored") == 50 assert output.count("Allowed") == 1 assert output.count("OUTLIER") == 0 assert output.count("General") == 29 assert output.count("Glycine") == 4 assert output.count("Trans-proline") == 5 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 5 assert output.count("Isoleucine or valine") == 8 numtotal = r.get_phi_psi_residues_count() assert r.get_outliers_count_and_fraction() == (0, 0. / numtotal) assert r.get_allowed_count_and_fraction() == (1, 1. / numtotal) assert r.get_favored_count_and_fraction() == (43, 43. / numtotal) #print r.get_general_count_and_fraction() assert r.get_general_count_and_fraction() == (25, 25. / numtotal) assert r.get_gly_count_and_fraction() == (4, 4. / numtotal) assert r.get_trans_pro_count_and_fraction() == (5, 5. / numtotal) assert r.get_cis_pro_count_and_fraction() == (0, 0. / numtotal) assert r.get_prepro_count_and_fraction() == (5, 5. / numtotal) assert r.get_ileval_count_and_fraction() == (5, 5. / numtotal) output_lines = output.splitlines() assert len(output_lines) == 51 selected_lines = [] for x in [0, 1, 5, 6, 7, 8, 9, 47, 48, 49, 50]: selected_lines.append(output_lines[x]) assert not show_diff( "\n".join(selected_lines), """\ A 2 ATHR:33.85:-106.92:144.23:Favored:General A 3 ACYS:47.07:-132.54:137.26:Favored:General A 7 AILE:98.76:-61.91:-44.35:Favored:Isoleucine or valine A 7 BILE:61.50:-56.21:-51.56:Favored:Isoleucine or valine A 8 AVAL:23.11:-50.35:-49.64:Favored:Isoleucine or valine A 8 BVAL:12.01:-83.20:-12.14:Favored:Isoleucine or valine A 8 CVAL:73.11:-61.22:-36.49:Favored:Isoleucine or valine A 43 AASP:51.81:-94.64:5.45:Favored:General A 43 BASP:56.98:-88.69:-0.12:Favored:General A 44 TYR:1.76:-133.10:58.75:Allowed:General A 45 ALA:57.37:-86.61:-8.57:Favored:General""") # Exercise 3: 2plx excerpt (unusual icode usage) import iotbx.pdb.hierarchy pdb_io = iotbx.pdb.hierarchy.input(pdb_string="""\ ATOM 1468 N GLY A 219 3.721 21.322 10.752 1.00 14.12 N ATOM 1469 CA GLY A 219 3.586 21.486 12.188 1.00 14.85 C ATOM 1470 C GLY A 219 4.462 20.538 12.995 1.00 15.63 C ATOM 1471 O GLY A 219 5.513 20.090 12.512 1.00 14.55 O ATOM 1472 N CYS A 220 4.036 20.213 14.235 1.00 15.02 N ATOM 1473 CA CYS A 220 4.776 19.228 15.068 1.00 15.56 C ATOM 1474 C CYS A 220 3.773 18.322 15.741 1.00 14.69 C ATOM 1475 O CYS A 220 2.799 18.828 16.338 1.00 15.54 O ATOM 1476 CB CYS A 220 5.620 19.906 16.174 1.00 15.72 C ATOM 1477 SG CYS A 220 6.762 21.133 15.448 1.00 15.45 S ATOM 1478 N ALA A 221A 4.054 17.017 15.707 1.00 14.77 N ATOM 1479 CA ALA A 221A 3.274 16.015 16.507 1.00 14.01 C ATOM 1480 C ALA A 221A 1.774 15.992 16.099 1.00 14.50 C ATOM 1481 O ALA A 221A 0.875 15.575 16.881 1.00 14.46 O ATOM 1482 CB ALA A 221A 3.440 16.318 17.935 1.00 12.28 C ATOM 1483 N GLN A 221 1.523 16.390 14.848 1.00 14.52 N ATOM 1484 CA GLN A 221 0.159 16.391 14.325 1.00 15.19 C ATOM 1485 C GLN A 221 -0.229 15.044 13.717 1.00 14.43 C ATOM 1486 O GLN A 221 0.641 14.280 13.307 1.00 16.88 O ATOM 1487 CB GLN A 221 0.002 17.491 13.272 1.00 16.41 C ATOM 1488 CG GLN A 221 0.253 18.906 13.805 1.00 16.52 C ATOM 1489 CD GLN A 221 -0.640 19.181 14.995 1.00 17.87 C ATOM 1490 OE1 GLN A 221 -1.857 19.399 14.826 1.00 13.54 O ATOM 1491 NE2 GLN A 221 -0.050 19.149 16.228 1.00 16.18 N ATOM 1492 N LYS A 222 -1.537 14.773 13.694 1.00 14.34 N ATOM 1493 CA LYS A 222 -2.053 13.536 13.125 1.00 15.07 C ATOM 1494 C LYS A 222 -1.679 13.455 11.655 1.00 14.88 C ATOM 1495 O LYS A 222 -1.856 14.424 10.883 1.00 14.32 O """) r = ramalyze.ramalyze(pdb_hierarchy=pdb_io.hierarchy, outliers_only=False) assert (len(r.results) == 3)
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False, tried_rama_angles={}, tried_final_rama_angles={}, n_run=0): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation=secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager self.ideal_res_dict = idealized_aa.residue_dict() self.n_run = n_run if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None # here we are recording what CCD solutions were used to fix particular # outliers to not use the same in the next CCD try. # Nested dict. First level: # key: chain id, value: dict # key: resid (string), value: list of tried variants. self.tried_rama_angles = tried_rama_angles self.tried_final_rama_angles = tried_final_rama_angles berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" self.number_of_ccd_trials = 0 # print "logic expr outcome:", (self.number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print self.number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if (self.berkeley_p_before_minimization_rama_outliers <= 0.001 and (n_bad_omegas<1 and self.params.make_all_trans)): print >> self.log, "No ramachandran outliers, skipping CCD step." print "n_bad_omegas", n_bad_omegas print "self.params.make_all_trans",self.params.make_all_trans if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (self.number_of_ccd_trials < self.params.number_of_ccd_trials and (self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 or (n_bad_omegas>=1 and self.params.make_all_trans)) and self.params.enabled): print >> self.log, "CCD try number, outliers:", self.number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): if chain.id not in self.tried_rama_angles.keys(): self.tried_rama_angles[chain.id] = {} if chain.id not in self.tried_final_rama_angles.keys(): self.tried_final_rama_angles[chain.id] = {} print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h), tried_rama_angles_for_chain=self.tried_rama_angles[chain.id], tried_final_rama_angles_for_chain=self.tried_final_rama_angles[chain.id]) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection print "self.tried_rama_angles", self.tried_rama_angles print "self.tried_final_rama_angles", self.tried_final_rama_angles # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count self.resulting_pdb_h.write_pdb_file(file_name="%d%s_discrepancy.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_not_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.minimize_whole: print >> self.log, "minimizing whole chain..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel # XXX but first let's check and fix rotamers... print >> self.log, "Fixing/checking rotamers in loop idealization..." excl_sel = self.ref_exclusion_selection if len(excl_sel) == 0: excl_sel = None non_outliers_for_check = asc.selection("(%s)" % self.ref_exclusion_selection) pre_result_h = mmtbx.utils.fix_rotamer_outliers( pdb_hierarchy=self.resulting_pdb_h, grm=self.grm.geometry, xrs=self.xrs, map_data=self.reference_map, radius=5, mon_lib_srv=None, rotamer_manager=self.rotamer_manager, backrub_range=None, # don't sample backrub at this point non_outliers_to_check=non_outliers_for_check, # bool selection asc=asc, verbose=True, log=self.log) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, number_of_cycles=Auto, log=self.log) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count print >> self.log, "Number of bad omegas:", n_bad_omegas self.number_of_ccd_trials += 1
def __init__( self, pdb_hierarchy, molprobity_scores=False, ): """ This class is being pickled. Try not to introduce huge members, e.g. self.hierarchy, etc. This is the reason ramalyze_obj, rotalyze_obj etc are not members of the class (not self.ramalyze_obj). """ self.clashscore = None self.ramachandran_outliers = None self.ramachandran_allowed = None self.ramachandran_favored = None self.rotamer_outliers = None self.c_beta_dev = None self.mpscore = None self.omglz = None self.n_cis_proline = None self.n_cis_general = None self.n_twisted_proline = None self.n_twisted_general = None if(molprobity_scores): ramalyze_obj = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) self.ramachandran_outliers = ramalyze_obj.percent_outliers self.ramachandran_outliers_cf = ramalyze_obj.get_outliers_count_and_fraction() self.ramachandran_allowed = ramalyze_obj.percent_allowed self.ramachandran_allowed_cf = ramalyze_obj.get_allowed_count_and_fraction() self.ramachandran_favored = ramalyze_obj.percent_favored self.ramachandran_favored_cf = ramalyze_obj.get_favored_count_and_fraction() rotalyze_obj = rotalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) self.rotamer_outliers = rotalyze_obj.percent_outliers self.rotamer_cf = rotalyze_obj.get_outliers_count_and_fraction() cbetadev_obj = cbetadev( pdb_hierarchy = pdb_hierarchy, outliers_only = True, out = null_out()) self.c_beta_dev = cbetadev_obj.get_outlier_count() self.c_beta_dev_percent = cbetadev_obj.get_weighted_outlier_percent() self.clashscore = clashscore(pdb_hierarchy=pdb_hierarchy).get_clashscore() self.mpscore = molprobity_score( clashscore = self.clashscore, rota_out = self.rotamer_outliers, rama_fav = self.ramachandran_favored) omglz = omegalyze.omegalyze( pdb_hierarchy=pdb_hierarchy, quiet=True) self.n_proline = omglz.n_proline() self.n_general = omglz.n_general() self.n_cis_proline = omglz.n_cis_proline() self.n_cis_general = omglz.n_cis_general() self.n_twisted_proline = omglz.n_twisted_proline() self.n_twisted_general = omglz.n_twisted_general() self.cis_general = 0 self.twisted_general = 0 self.cis_proline = 0 self.twisted_proline = 0 if self.n_proline != 0: self.cis_proline = self.n_cis_proline*100./self.n_proline self.twisted_proline = self.n_twisted_proline*100./self.n_proline if self.n_general != 0: self.cis_general = self.n_cis_general*100./self.n_general self.twisted_general = self.n_twisted_general*100./self.n_general self.cablam_outliers=None self.cablam_disfavored=None self.cablam_ca_outliers=None try: cablam_results = cablam.cablamalyze(pdb_hierarchy, outliers_only=False, out=null_out(), quiet=True) self.cablam_outliers = cablam_results.percent_outliers() self.cablam_disfavored = cablam_results.percent_disfavored() self.cablam_ca_outliers = cablam_results.percent_ca_outliers() except Exception as e: print "CaBLAM failed with exception:" print " %s" % str(e) pass
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, log=null_out(), verbose=True): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation=secondary_structure_annotation xrs = pdb_hierarchy.extract_xray_structure() asc = pdb_hierarchy.atom_selection_cache() self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.r = rama_eval() self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 # self.berkeley_p_before_minimization_rama_outliers = None self.berkeley_p_after_minimiaztion_rama_outliers = None self.ref_exclusion_selection = "" number_of_ccd_trials = 0 # print "logic expr outcome:", (number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if self.berkeley_p_before_minimization_rama_outliers <= 0.001: print >> self.log, "No ramachandran outliers, skipping CCD step." if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (number_of_ccd_trials < self.params.number_of_ccd_trials and self.berkeley_p_before_minimization_rama_outliers > 0.001 and self.params.enabled): print "CCD try number, outliers:", number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers number_of_ccd_trials += 1 processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minimization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count if self.params.minimize_whole: print >> self.log, "minimizing whole thing..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, log=None, ss_annotation=self.secondary_structure_annotation) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count
def exercise_ramalyze(): from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/jcm.pdb", test=os.path.isfile) if (regression_pdb is None): print "Skipping exercise_ramalyze(): input pdb (jcm.pdb) not available" return if (find_rotarama_data_dir(optional=True) is None): print "Skipping exercise_ramalyze(): rotarama_data directory not available" return from iotbx import file_reader # Exercise 1 pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy pdb_io = pdb.input(file_name=regression_pdb) hierarchy.atoms().reset_i_seq() r = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out) output = out.getvalue() assert output.count("OUTLIER") == 100 assert output.count("Favored") == 0 assert output.count("Allowed") == 0 assert output.count("General") == 64 assert output.count("Glycine") == 6 assert output.count("Trans-proline") == 1 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 4 assert output.count("Isoleucine or valine") == 25 assert (len(r.outlier_selection()) == 788) outlier_ids = set([]) atoms = hierarchy.atoms() for i_seq in r.outlier_selection() : atom = atoms[i_seq] atom_group = atoms[i_seq].parent() outlier_ids.add(atom_group.id_str()) outliers1 = sorted([ o.atom_group_id_str() for o in r.results ]) outliers2 = sorted(list(outlier_ids)) assert (outliers1 == outliers2) r = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True] : if unpickle : r = loads(dumps(r)) for outlier in r.results : assert (len(outlier.xyz) == 3) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("OUTLIER") == 100 assert output.count("Favored") == 461 assert output.count("Allowed") == 162 assert output.count("General") == 513 assert output.count("Glycine") == 39 assert output.count("Trans-proline") == 23 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 21 assert output.count("Isoleucine or valine") == 127 numtotal = r.get_phi_psi_residues_count() assert r.get_outliers_count_and_fraction() == (100, 100./numtotal) assert r.get_allowed_count_and_fraction() == (162, 162./numtotal) assert r.get_favored_count_and_fraction() == (461, 461./numtotal) assert r.get_general_count_and_fraction() == (513, 513./numtotal) assert r.get_gly_count_and_fraction() == (39, 39./numtotal) assert r.get_trans_pro_count_and_fraction() == (23, 23./numtotal) assert r.get_cis_pro_count_and_fraction() == (0, 0./numtotal) assert r.get_prepro_count_and_fraction() == (21, 21./numtotal) assert r.get_ileval_count_and_fraction() == (127, 127./numtotal) assert numtotal == 75+154+494 output_lines = output.splitlines() assert len(output_lines) == 723 selected_lines = [] for x in [0, 1, 168, 169, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722]: selected_lines.append(output_lines[x]) assert not show_diff("\n".join(selected_lines), """\ A 15 SER:35.07:-83.26:131.88:Favored:General A 16 SER:0.74:-111.53:71.36:Allowed:General A 191 ASP:2.66:-42.39:121.87:Favored:Pre-proline A 192 PRO:0.31:-39.12:-31.84:Allowed:Trans-proline B 368 LYS:56.44:-62.97:-53.28:Favored:General B 369 GLU:8.89:-44.36:-45.50:Favored:General B 370 LYS:40.00:-50.00:-39.06:Favored:General B 371 VAL:68.24:-60.38:-51.85:Favored:Isoleucine or valine B 372 LEU:0.02:-61.13:-170.23:OUTLIER:General B 373 ARG:0.02:60.09:-80.26:OUTLIER:General B 374 ALA:0.13:-37.21:-36.12:Allowed:General B 375 LEU:11.84:-89.81:-41.45:Favored:General B 376 ASN:84.33:-58.30:-41.39:Favored:General B 377 GLU:30.88:-56.79:-21.74:Favored:General""") assert (len(r.outlier_selection()) == 788) # Exercise 2 regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile) pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy hierarchy.atoms().reset_i_seq() r = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out) output = out.getvalue() assert output.count("Favored") == 0 assert output.count("Allowed") == 0 assert output.count("OUTLIER") == 0 r = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True] : if unpickle : r = loads(dumps(r)) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("Favored") == 47 assert output.count("Allowed") == 1 assert output.count("OUTLIER") == 0 assert output.count("General") == 27 assert output.count("Glycine") == 4 assert output.count("Trans-proline") == 4 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 5 assert output.count("Isoleucine or valine") == 8 numtotal = r.get_phi_psi_residues_count() assert r.get_outliers_count_and_fraction() == (0, 0./numtotal) assert r.get_allowed_count_and_fraction() == (1, 1./numtotal) assert r.get_favored_count_and_fraction() == (47, 47./numtotal) assert r.get_general_count_and_fraction() == (27, 27./numtotal) assert r.get_gly_count_and_fraction() == (4, 4./numtotal) assert r.get_trans_pro_count_and_fraction() == (4, 4./numtotal) assert r.get_cis_pro_count_and_fraction() == (0, 0./numtotal) assert r.get_prepro_count_and_fraction() == (5, 5./numtotal) assert r.get_ileval_count_and_fraction() == (8, 8./numtotal) output_lines = output.splitlines() assert len(output_lines) == 48 selected_lines = [] for x in [0, 1, 6, 7, 8, 9, 10, 44, 45, 46, 47]: selected_lines.append(output_lines[x]) assert not show_diff("\n".join(selected_lines), """\ A 2 ATHR:33.85:-106.92:144.23:Favored:General A 2 BTHR:37.07:-97.44:137.00:Favored:General A 7 AILE:98.76:-61.91:-44.35:Favored:Isoleucine or valine A 7 BILE:61.50:-56.21:-51.56:Favored:Isoleucine or valine A 8 AVAL:23.11:-50.35:-49.64:Favored:Isoleucine or valine A 8 BVAL:12.01:-83.20:-12.14:Favored:Isoleucine or valine A 8 CVAL:73.11:-61.22:-36.49:Favored:Isoleucine or valine A 43 AASP:51.81:-94.64:5.45:Favored:General A 43 BASP:56.98:-88.69:-0.12:Favored:General A 44 TYR:1.76:-133.10:58.75:Allowed:General A 45 ALA:57.37:-86.61:-8.57:Favored:General""") # Exercise 3: 2plx excerpt (unusual icode usage) import iotbx.pdb.hierarchy pdb_io = iotbx.pdb.hierarchy.input(pdb_string="""\ ATOM 1468 N GLY A 219 3.721 21.322 10.752 1.00 14.12 N ATOM 1469 CA GLY A 219 3.586 21.486 12.188 1.00 14.85 C ATOM 1470 C GLY A 219 4.462 20.538 12.995 1.00 15.63 C ATOM 1471 O GLY A 219 5.513 20.090 12.512 1.00 14.55 O ATOM 1472 N CYS A 220 4.036 20.213 14.235 1.00 15.02 N ATOM 1473 CA CYS A 220 4.776 19.228 15.068 1.00 15.56 C ATOM 1474 C CYS A 220 3.773 18.322 15.741 1.00 14.69 C ATOM 1475 O CYS A 220 2.799 18.828 16.338 1.00 15.54 O ATOM 1476 CB CYS A 220 5.620 19.906 16.174 1.00 15.72 C ATOM 1477 SG CYS A 220 6.762 21.133 15.448 1.00 15.45 S ATOM 1478 N ALA A 221A 4.054 17.017 15.707 1.00 14.77 N ATOM 1479 CA ALA A 221A 3.274 16.015 16.507 1.00 14.01 C ATOM 1480 C ALA A 221A 1.774 15.992 16.099 1.00 14.50 C ATOM 1481 O ALA A 221A 0.875 15.575 16.881 1.00 14.46 O ATOM 1482 CB ALA A 221A 3.440 16.318 17.935 1.00 12.28 C ATOM 1483 N GLN A 221 1.523 16.390 14.848 1.00 14.52 N ATOM 1484 CA GLN A 221 0.159 16.391 14.325 1.00 15.19 C ATOM 1485 C GLN A 221 -0.229 15.044 13.717 1.00 14.43 C ATOM 1486 O GLN A 221 0.641 14.280 13.307 1.00 16.88 O ATOM 1487 CB GLN A 221 0.002 17.491 13.272 1.00 16.41 C ATOM 1488 CG GLN A 221 0.253 18.906 13.805 1.00 16.52 C ATOM 1489 CD GLN A 221 -0.640 19.181 14.995 1.00 17.87 C ATOM 1490 OE1 GLN A 221 -1.857 19.399 14.826 1.00 13.54 O ATOM 1491 NE2 GLN A 221 -0.050 19.149 16.228 1.00 16.18 N ATOM 1492 N LYS A 222 -1.537 14.773 13.694 1.00 14.34 N ATOM 1493 CA LYS A 222 -2.053 13.536 13.125 1.00 15.07 C ATOM 1494 C LYS A 222 -1.679 13.455 11.655 1.00 14.88 C ATOM 1495 O LYS A 222 -1.856 14.424 10.883 1.00 14.32 O """) r = ramalyze.ramalyze( pdb_hierarchy=pdb_io.hierarchy, outliers_only=False) assert (len(r.results) == 3)
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation = secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 # self.berkeley_p_before_minimization_rama_outliers = None self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" number_of_ccd_trials = 0 # print "logic expr outcome:", (number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if self.berkeley_p_before_minimization_rama_outliers <= 0.001: print >> self.log, "No ramachandran outliers, skipping CCD step." if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (number_of_ccd_trials < self.params.number_of_ccd_trials and self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 and self.params.enabled): print "CCD try number, outliers:", number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers number_of_ccd_trials += 1 processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain( c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % ( c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string( )).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[: -3] # self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minimization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count if self.params.minimize_whole: print >> self.log, "minimizing whole thing..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, log=self.log) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count
def ramalyze_parallel(hierarchy): return ramalyze(hierarchy, out=null_out())
def __init__(self, pdb_hierarchy, xray_structure=None, fmodel=None, fmodel_neutron=None, geometry_restraints_manager=None, crystal_symmetry=None, sequences=None, flags=None, header_info=None, raw_data=None, unmerged_data=None, all_chain_proxies=None, keep_hydrogens=True, nuclear=False, save_probe_unformatted_file=None, show_hydrogen_outliers=False, min_cc_two_fofc=0.8, n_bins_data=10, count_anomalous_pairs_separately=False, use_internal_variance=True, outliers_only=True, use_pdb_header_resolution_cutoffs=False, file_name=None, ligand_selection=None, rotamer_library="8000", map_params=None): assert rotamer_library == "8000", "data_version given to RotamerEval not recognized." for name in self.__slots__: setattr(self, name, None) # very important - the i_seq attributes may be extracted later pdb_hierarchy.atoms().reset_i_seq() self.pdb_hierarchy = pdb_hierarchy if (xray_structure is None): if (fmodel is not None): xray_structure = fmodel.xray_structure elif (crystal_symmetry is not None): xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.crystal_symmetry = crystal_symmetry if (crystal_symmetry is None) and (fmodel is not None): self.crystal_symmetry = fmodel.f_obs().crystal_symmetry() self.header_info = header_info if (flags is None): flags = molprobity_flags() if pdb_hierarchy.contains_protein(): self.find_missing_atoms(out=null_out()) if (flags.ramalyze): self.ramalyze = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=outliers_only, out=null_out(), quiet=True) ##### omegalyze ################################################################ if (flags.omegalyze): self.omegalyze = omegalyze.omegalyze( pdb_hierarchy=pdb_hierarchy, nontrans_only=outliers_only, out=null_out(), quiet=True) ##### omegalyze ################################################################ if (flags.rotalyze): self.rotalyze = rotalyze.rotalyze(pdb_hierarchy=pdb_hierarchy, data_version=rotamer_library, outliers_only=outliers_only, out=null_out(), quiet=True) if (flags.cbetadev): self.cbetadev = cbetadev.cbetadev(pdb_hierarchy=pdb_hierarchy, outliers_only=outliers_only, out=null_out(), quiet=True) if (flags.nqh): self.nqh_flips = clashscore.nqh_flips( pdb_hierarchy=pdb_hierarchy) if (pdb_hierarchy.contains_rna() and flags.rna and libtbx.env.has_module(name="suitename")): if (geometry_restraints_manager is not None): self.rna = rna_validate.rna_validation( pdb_hierarchy=pdb_hierarchy, geometry_restraints_manager=geometry_restraints_manager, outliers_only=outliers_only, params=None) if (flags.clashscore): self.clashes = clashscore.clashscore( pdb_hierarchy=pdb_hierarchy, save_probe_unformatted_file=save_probe_unformatted_file, nuclear=nuclear, keep_hydrogens=keep_hydrogens, out=null_out(), verbose=False) if (flags.model_stats) and (xray_structure is not None): self.model_stats = model_properties.model_statistics( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, all_chain_proxies=all_chain_proxies, ignore_hd=(not nuclear), ligand_selection=ligand_selection) if (geometry_restraints_manager is not None) and (flags.restraints): assert (xray_structure is not None) self.restraints = restraints.combined( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, geometry_restraints_manager=geometry_restraints_manager, ignore_hd=(not nuclear), cdl=getattr(all_chain_proxies, "use_cdl", None)) if (sequences is not None) and (flags.seq): self.sequence = sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=null_out(), include_secondary_structure=True, extract_coordinates=True) # use maps (fmodel is not used) use_maps = False if (map_params is not None): use_maps = ((map_params.input.maps.map_file_name) or ((map_params.input.maps.map_coefficients_file_name) and (map_params.input.maps.map_coefficients_label))) if (use_maps): if (flags.real_space): self.real_space = experimental.real_space( fmodel=None, pdb_hierarchy=pdb_hierarchy, cc_min=min_cc_two_fofc, molprobity_map_params=map_params.input.maps) if (flags.waters): self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=None, collect_all=True, molprobity_map_params=map_params.input.maps) if (fmodel is not None): if (use_pdb_header_resolution_cutoffs) and (header_info is not None): fmodel = fmodel.resolution_filter(d_min=header_info.d_min, d_max=header_info.d_max) if (flags.rfactors): self.data_stats = experimental.data_statistics( fmodel, raw_data=raw_data, n_bins=n_bins_data, count_anomalous_pairs_separately= count_anomalous_pairs_separately) if (not use_maps): # if maps are used, keep previous results if (flags.real_space): self.real_space = experimental.real_space( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, cc_min=min_cc_two_fofc) if (flags.waters): self.waters = waters.waters(pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=fmodel, collect_all=True) if (unmerged_data is not None): self.merging = experimental.merging_and_model_statistics( f_obs=fmodel.f_obs(), f_model=fmodel.f_model(), r_free_flags=fmodel.r_free_flags(), unmerged_i_obs=unmerged_data, anomalous=count_anomalous_pairs_separately, use_internal_variance=use_internal_variance, n_bins=n_bins_data) if (flags.xtriage): import mmtbx.scaling.xtriage f_model = abs( fmodel.f_model()).set_observation_type_xray_amplitude() if (raw_data is not None): f_model, obs = f_model.common_sets(other=raw_data) else: obs = fmodel.f_obs() self.xtriage = mmtbx.scaling.xtriage.xtriage_analyses( miller_obs=obs, miller_calc=f_model, unmerged_obs=unmerged_data, # XXX some redundancy here... text_out=null_out()) if (fmodel_neutron is not None) and (flags.rfactors): self.neutron_stats = experimental.data_statistics( fmodel_neutron, n_bins=n_bins_data, count_anomalous_pairs_separately=False) if (pdb_hierarchy.models_size() == 1): self._multi_criterion = multi_criterion_view(pdb_hierarchy)
def get_model_stat(file_name): pdb_inp = iotbx.pdb.input(file_name=file_name) atoms = pdb_inp.atoms() box = uctbx.non_crystallographic_unit_cell_with_the_sites_in_its_center( sites_cart=atoms.extract_xyz(), buffer_layer=5) atoms.set_xyz(new_xyz=box.sites_cart) ph = pdb_inp.construct_hierarchy() if (all_single_atom_residues(ph=ph)): return None raw_recs = ph.as_pdb_string( crystal_symmetry=box.crystal_symmetry()).splitlines() # params = monomer_library.pdb_interpretation.master_params.extract() params.clash_guard.nonbonded_distance_threshold = None params.disable_uc_volume_vs_n_atoms_check = False params.use_neutron_distances = True params.restraints_library.cdl = False processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, raw_records=raw_recs, params=params, log=null_out()) xrs = processed_pdb_file.xray_structure() sctr_keys = xrs.scattering_type_registry().type_count_dict().keys() has_hd = "H" in sctr_keys or "D" in sctr_keys restraints_manager = processed_pdb_file.geometry_restraints_manager( show_energies=False, assume_hydrogens_all_missing=not has_hd, plain_pairs_radius=5.0) a_mean, b_mean = get_bonds_angles_rmsd( restraints_manager=restraints_manager, xrs=xrs) energies_sites = \ restraints_manager.energies_sites( sites_cart = xrs.sites_cart(), compute_gradients = False) nonbonded_distances = energies_sites.nonbonded_distances() number_of_worst_clashes = (nonbonded_distances < 0.5).count(True) # ramalyze_obj = ramalyze(pdb_hierarchy=ph, outliers_only=False) ramachandran_outliers = ramalyze_obj.percent_outliers rotamer_outliers = rotalyze(pdb_hierarchy=ph, outliers_only=False).percent_outliers c_beta_dev = cbetadev(pdb_hierarchy=ph, outliers_only=True, out=null_out()).get_outlier_count() omglz = omegalyze.omegalyze(pdb_hierarchy=ph, quiet=True) n_cis_proline = omglz.n_cis_proline() n_cis_general = omglz.n_cis_general() n_twisted_proline = omglz.n_twisted_proline() n_twisted_general = omglz.n_twisted_general() # clsc = clashscore(pdb_hierarchy=ph).get_clashscore() mpscore = molprobity_score(clashscore=clsc, rota_out=rotamer_outliers, rama_fav=ramalyze_obj.percent_favored) # occ = atoms.extract_occ() bs = atoms.extract_b() # return group_args(b_mean=b_mean, a_mean=a_mean, number_of_worst_clashes=number_of_worst_clashes, ramachandran_outliers=ramachandran_outliers, rotamer_outliers=rotamer_outliers, c_beta_dev=c_beta_dev, n_cis_proline=n_cis_proline, n_cis_general=n_cis_general, n_twisted_proline=n_twisted_proline, n_twisted_general=n_twisted_general, o=occ.min_max_mean().as_tuple(), b=bs.min_max_mean().as_tuple(), mpscore=mpscore, clsc=clsc, n_atoms=atoms.size())
def make_multikin(f, processed_pdb_file, pdbID=None, keep_hydrogens=False): if pdbID == None: pdbID = "PDB" hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy i_seq_name_hash = build_name_hash(pdb_hierarchy=hierarchy) sites_cart=processed_pdb_file.all_chain_proxies.sites_cart geometry = processed_pdb_file.geometry_restraints_manager() flags = geometry_restraints.flags.flags(default=True) angle_proxies = geometry.angle_proxies pair_proxies = geometry.pair_proxies(flags=flags, sites_cart=sites_cart) bond_proxies = pair_proxies.bond_proxies quick_bond_hash = {} for bp in bond_proxies.simple: if (i_seq_name_hash[bp.i_seqs[0]][9:14] == i_seq_name_hash[bp.i_seqs[1]][9:14]): if quick_bond_hash.get(bp.i_seqs[0]) is None: quick_bond_hash[bp.i_seqs[0]] = [] quick_bond_hash[bp.i_seqs[0]].append(bp.i_seqs[1]) kin_out = get_default_header() altid_controls = get_altid_controls(hierarchy=hierarchy) if altid_controls != "": kin_out += altid_controls kin_out += "@group {%s} dominant animate\n" % pdbID initiated_chains = [] rot_outliers = rotalyze(pdb_hierarchy=hierarchy, outliers_only=True) cb = cbetadev( pdb_hierarchy=hierarchy, outliers_only=True) rama = ramalyze(pdb_hierarchy=hierarchy, outliers_only=True) counter = 0 for model in hierarchy.models(): for chain in model.chains(): if chain.id not in initiated_chains: kin_out += "@subgroup {%s} dominant master= {chain %s}\n" % ( pdbID, chain.id) initiated_chains.append(chain.id) kin_out += get_kin_lots(chain=chain, bond_hash=quick_bond_hash, i_seq_name_hash=i_seq_name_hash, pdbID=pdbID, index=counter) if (chain.is_protein()) : kin_out += rotamer_outliers(chain=chain, pdbID=pdbID, rot_outliers=rot_outliers) kin_out += rama_outliers(chain=chain, pdbID=pdbID, ram_outliers=rama) # TODO use central methods in mmtbx.validation.restraints kin_out += get_angle_outliers(angle_proxies=angle_proxies, chain=chain, sites_cart=sites_cart, hierarchy=hierarchy) kin_out += get_bond_outliers(bond_proxies=bond_proxies, chain=chain, sites_cart=sites_cart, hierarchy=hierarchy) if (chain.is_protein()) : kin_out += cbeta_dev(chain_id=chain.id, outliers=cb.results) kin_out += pperp_outliers(hierarchy=hierarchy, chain=chain) counter += 1 kin_out += omegalyze.omegalyze(pdb_hierarchy=hierarchy,nontrans_only=True, out=None,quiet=False).as_kinemage() kin_out += make_probe_dots(hierarchy=hierarchy, keep_hydrogens=keep_hydrogens) kin_out += get_footer() outfile = file(f, 'w') for line in kin_out: outfile.write(line) outfile.close() return f
def __init__ (self, pdb_hierarchy, xray_structure=None, fmodel=None, fmodel_neutron=None, geometry_restraints_manager=None, crystal_symmetry=None, sequences=None, flags=None, header_info=None, raw_data=None, unmerged_data=None, all_chain_proxies=None, keep_hydrogens=True, nuclear=False, save_probe_unformatted_file=None, show_hydrogen_outliers=False, min_cc_two_fofc=0.8, n_bins_data=10, count_anomalous_pairs_separately=False, use_internal_variance=True, outliers_only=True, use_pdb_header_resolution_cutoffs=False, file_name=None, ligand_selection=None, rotamer_library="8000", map_params=None) : assert rotamer_library == "8000", "data_version given to RotamerEval not recognized." for name in self.__slots__ : setattr(self, name, None) # very important - the i_seq attributes may be extracted later pdb_hierarchy.atoms().reset_i_seq() self.pdb_hierarchy = pdb_hierarchy if (xray_structure is None) : if (fmodel is not None) : xray_structure = fmodel.xray_structure elif (crystal_symmetry is not None) : xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.crystal_symmetry = crystal_symmetry if (crystal_symmetry is None) and (fmodel is not None) : self.crystal_symmetry = fmodel.f_obs().crystal_symmetry() self.header_info = header_info if (flags is None) : flags = molprobity_flags() if pdb_hierarchy.contains_protein() : if (flags.ramalyze) : self.ramalyze = ramalyze.ramalyze( pdb_hierarchy=pdb_hierarchy, outliers_only=outliers_only, out=null_out(), quiet=True) ##### omegalyze ################################################################ if (flags.omegalyze) : self.omegalyze = omegalyze.omegalyze( pdb_hierarchy=pdb_hierarchy, nontrans_only=outliers_only, out=null_out(), quiet=True) ##### omegalyze ################################################################ if (flags.rotalyze) : self.rotalyze = rotalyze.rotalyze( pdb_hierarchy=pdb_hierarchy, data_version=rotamer_library, outliers_only=outliers_only, out=null_out(), quiet=True) if (flags.cbetadev) : self.cbetadev = cbetadev.cbetadev( pdb_hierarchy=pdb_hierarchy, outliers_only=outliers_only, out=null_out(), quiet=True) if (flags.nqh) : self.nqh_flips = clashscore.nqh_flips( pdb_hierarchy=pdb_hierarchy) if (pdb_hierarchy.contains_rna() and flags.rna and libtbx.env.has_module(name="suitename")) : if (geometry_restraints_manager is not None) : self.rna = rna_validate.rna_validation( pdb_hierarchy=pdb_hierarchy, geometry_restraints_manager=geometry_restraints_manager, outliers_only=outliers_only, params=None) if (flags.clashscore) : self.clashes = clashscore.clashscore( pdb_hierarchy=pdb_hierarchy, save_probe_unformatted_file=save_probe_unformatted_file, nuclear=nuclear, keep_hydrogens=keep_hydrogens, out=null_out(), verbose=False) if (flags.model_stats) and (xray_structure is not None) : self.model_stats = model_properties.model_statistics( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, all_chain_proxies=all_chain_proxies, ignore_hd=(not nuclear), ligand_selection=ligand_selection) if (geometry_restraints_manager is not None) and (flags.restraints) : assert (xray_structure is not None) self.restraints = restraints.combined( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, geometry_restraints_manager=geometry_restraints_manager, ignore_hd=(not nuclear), cdl=getattr(all_chain_proxies, "use_cdl", None)) if (sequences is not None) and (flags.seq) : self.sequence = sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=null_out(), include_secondary_structure=True, extract_coordinates=True) # use maps (fmodel is not used) use_maps = False if (map_params is not None): use_maps = ( (map_params.input.maps.map_file_name) or ( (map_params.input.maps.map_coefficients_file_name) and (map_params.input.maps.map_coefficients_label) ) ) if (use_maps): if (flags.real_space): self.real_space = experimental.real_space( fmodel=None, pdb_hierarchy=pdb_hierarchy, cc_min=min_cc_two_fofc, molprobity_map_params=map_params.input.maps) if (flags.waters): self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=None, collect_all=True, molprobity_map_params=map_params.input.maps) if (fmodel is not None) : if (use_pdb_header_resolution_cutoffs) and (header_info is not None) : fmodel = fmodel.resolution_filter( d_min=header_info.d_min, d_max=header_info.d_max) if (flags.rfactors) : self.data_stats = experimental.data_statistics(fmodel, raw_data=raw_data, n_bins=n_bins_data, count_anomalous_pairs_separately=count_anomalous_pairs_separately) if (not use_maps): # if maps are used, keep previous results if (flags.real_space): self.real_space = experimental.real_space( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, cc_min=min_cc_two_fofc) if (flags.waters) : self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=fmodel, collect_all=True) if (unmerged_data is not None) : self.merging = experimental.merging_and_model_statistics( f_obs=fmodel.f_obs(), f_model=fmodel.f_model(), r_free_flags=fmodel.r_free_flags(), unmerged_i_obs=unmerged_data, anomalous=count_anomalous_pairs_separately, use_internal_variance=use_internal_variance, n_bins=n_bins_data) if (flags.xtriage) : import mmtbx.scaling.xtriage f_model = abs(fmodel.f_model()).set_observation_type_xray_amplitude() if (raw_data is not None) : f_model, obs = f_model.common_sets(other=raw_data) else : obs = fmodel.f_obs() self.xtriage = mmtbx.scaling.xtriage.xtriage_analyses( miller_obs=obs, miller_calc=f_model, unmerged_obs=unmerged_data, # XXX some redundancy here... text_out=null_out()) if (fmodel_neutron is not None) and (flags.rfactors) : self.neutron_stats = experimental.data_statistics(fmodel_neutron, n_bins=n_bins_data, count_anomalous_pairs_separately=False) if (pdb_hierarchy.models_size() == 1) : self._multi_criterion = multi_criterion_view(pdb_hierarchy)
def __init__( self, pdb_hierarchy, restraints_manager, molprobity_scores=False, n_histogram_slots=10, cdl_restraints=False, ignore_hydrogens=False, #only used by amber ): self.cdl_restraints=cdl_restraints sites_cart = pdb_hierarchy.atoms().extract_xyz() energies_sites = \ restraints_manager.energies_sites( sites_cart = sites_cart, compute_gradients = False) # molprobity scores self.clashscore = None self.ramachandran_outliers = None self.ramachandran_allowed = None self.ramachandran_favored = None self.rotamer_outliers = None self.c_beta_dev = None self.mpscore = None if(molprobity_scores): self.ramalyze_obj = ramalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) self.ramachandran_outliers = self.ramalyze_obj.percent_outliers self.ramachandran_allowed = self.ramalyze_obj.percent_allowed self.ramachandran_favored = self.ramalyze_obj.percent_favored self.rotalyze_obj = rotalyze(pdb_hierarchy=pdb_hierarchy, outliers_only=False) self.rotamer_outliers = self.rotalyze_obj.percent_outliers self.cbetadev_obj = cbetadev( pdb_hierarchy = pdb_hierarchy, outliers_only = True, out = null_out()) self.c_beta_dev = self.cbetadev_obj.get_outlier_count() self.clashscore = clashscore(pdb_hierarchy=pdb_hierarchy).get_clashscore() self.mpscore = molprobity_score( clashscore = self.clashscore, rota_out = self.rotamer_outliers, rama_fav = self.ramachandran_favored) # if(hasattr(energies_sites, "geometry")): esg = energies_sites.geometry else: esg = energies_sites self.a = None self.b = None if not hasattr(esg, "angle_deviations"): return if hasattr(esg, "amber"): amber_parm = restraints_manager.amber_structs.parm self.a, angle_deltas = esg.angle_deviations(sites_cart, amber_parm, ignore_hd=ignore_hydrogens, get_deltas=True) self.b, bond_deltas = esg.bond_deviations(sites_cart, amber_parm, ignore_hd=ignore_hydrogens, get_deltas=True) self.a_number = esg.n_angle_proxies(amber_parm, ignore_hd=ignore_hydrogens) self.b_number = esg.n_bond_proxies(amber_parm, ignore_hd=ignore_hydrogens) self.c, self.p, self.ll, self.d, self.n = None, None, None, None, None self.c_number=0 self.p_number=0 self.d_number=0 self.bond_deltas_histogram = \ flex.histogram(data = flex.abs(bond_deltas), n_slots = n_histogram_slots) self.angle_deltas_histogram = \ flex.histogram(data = flex.abs(angle_deltas), n_slots = n_histogram_slots) # nonbonded_distances = esg.nonbonded_distances() # self.nonbonded_distances_histogram = flex.histogram( # data = flex.abs(nonbonded_distances), n_slots = n_histogram_slots) for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] : for value_type in [("mean",2), ("max",1), ("min",0)] : name = "%s_%s" % (restraint_type, value_type[0]) if getattr(self, restraint_type) is None: setattr(self, name, None) continue setattr(self, name, getattr(self, restraint_type)[value_type[1]]) return self.a = esg.angle_deviations() self.b = esg.bond_deviations() self.a_number = esg.get_filtered_n_angle_proxies() self.b_number = esg.get_filtered_n_bond_proxies() self.c = esg.chirality_deviations() self.d = esg.dihedral_deviations() self.p = esg.planarity_deviations() self.ll = esg.parallelity_deviations() self.n = esg.nonbonded_deviations() self.c_number = esg.n_chirality_proxies self.d_number = esg.n_dihedral_proxies self.p_number = esg.n_planarity_proxies self.n_number = esg.n_nonbonded_proxies # for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] : for value_type in [("mean",2), ("max",1), ("min",0)] : name = "%s_%s" % (restraint_type, value_type[0]) if getattr(self, restraint_type) is None: continue setattr(self, name, getattr(self, restraint_type)[value_type[1]]) # if(hasattr(restraints_manager, "geometry")): rmg = restraints_manager.geometry else: rmg = restraints_manager bond_deltas = geometry_restraints.bond_deltas( sites_cart = sites_cart, sorted_asu_proxies = rmg.pair_proxies().bond_proxies) angle_deltas = geometry_restraints.angle_deltas( sites_cart = sites_cart, proxies = rmg.angle_proxies) nonbonded_distances = esg.nonbonded_distances() self.bond_deltas_histogram = \ flex.histogram(data = flex.abs(bond_deltas), n_slots = n_histogram_slots) self.angle_deltas_histogram = \ flex.histogram(data = flex.abs(angle_deltas), n_slots = n_histogram_slots) self.nonbonded_distances_histogram = flex.histogram( data = flex.abs(nonbonded_distances), n_slots = n_histogram_slots) # assert approx_equal( esg.target, esg.angle_residual_sum+ esg.bond_residual_sum+ esg.chirality_residual_sum+ esg.dihedral_residual_sum+ esg.nonbonded_residual_sum+ esg.planarity_residual_sum+ esg.parallelity_residual_sum+ esg.reference_coordinate_residual_sum+ esg.reference_dihedral_residual_sum+ esg.ncs_dihedral_residual_sum+ esg.den_residual_sum+ esg.ramachandran_residual_sum) del energies_sites, esg # we accumulate this object, so make it clean asap