def __init__(self, model, n_histogram_slots = 10, file_name=None, selection=None): self.wilson_b = model.wilson_b self.file_name = file_name self.selection = selection self.rms_b_iso_or_b_equiv_bonded = model.rms_b_iso_or_b_equiv_bonded() eps = math.pi**2*8 solvent_selection = model.solvent_selection() hd_selection = model.xray_structure.hd_selection() m_noH_sel = ((~solvent_selection) & (~hd_selection)) s_noH_sel = ((solvent_selection) & (~hd_selection)) # xs_a = model.xray_structure xs_a_noH = model.xray_structure.select(~hd_selection) xs_s_noH = model.xray_structure.select(s_noH_sel) xs_m_noH = model.xray_structure.select(m_noH_sel) xs_h = model.xray_structure.select(hd_selection) # u_a = xs_a .extract_u_iso_or_u_equiv() u_a_noH = xs_a_noH.extract_u_iso_or_u_equiv() u_s_noH = xs_s_noH.extract_u_iso_or_u_equiv() u_m_noH = xs_m_noH.extract_u_iso_or_u_equiv() u_h = xs_h .extract_u_iso_or_u_equiv() self.b_min_a, self.b_max_a, self.b_mean_a = self.mmmd(u_a, eps) self.b_min_a_noH,self.b_max_a_noH,self.b_mean_a_noH= self.mmmd(u_a_noH,eps) self.b_min_s_noH,self.b_max_s_noH,self.b_mean_s_noH= self.mmmd(u_s_noH,eps) self.b_min_m_noH,self.b_max_m_noH,self.b_mean_m_noH= self.mmmd(u_m_noH,eps) self.b_min_h, self.b_max_h, self.b_mean_h = self.mmmd(u_h, eps) # uc = model.xray_structure.unit_cell() a_a = xs_a .scatterers().anisotropy(unit_cell =uc).select(xs_a .use_u_aniso()) a_a_noH = xs_a_noH.scatterers().anisotropy(unit_cell =uc).select(xs_a_noH.use_u_aniso()) a_s_noH = xs_s_noH.scatterers().anisotropy(unit_cell =uc).select(xs_s_noH.use_u_aniso()) a_m_noH = xs_m_noH.scatterers().anisotropy(unit_cell =uc).select(xs_m_noH.use_u_aniso()) a_h = xs_h .scatterers().anisotropy(unit_cell =uc).select(xs_h .use_u_aniso()) # self.n_aniso_a = xs_a .use_u_aniso().count(True) self.n_aniso_a_noH = xs_a_noH.use_u_aniso().count(True) self.n_aniso_s_noH = xs_s_noH.use_u_aniso().count(True) self.n_aniso_m_noH = xs_m_noH.use_u_aniso().count(True) self.n_aniso_h = xs_h .use_u_aniso().count(True) self.n_iso_a = xs_a .use_u_iso().count(True) self.n_iso_a_noH = xs_a_noH.use_u_iso().count(True) self.n_iso_s_noH = xs_s_noH.use_u_iso().count(True) self.n_iso_m_noH = xs_m_noH.use_u_iso().count(True) self.n_iso_h = xs_h .use_u_iso().count(True) # self.a_min_a, self.a_max_a, self.a_mean_a = self.mmmd(a_a) self.a_min_a_noH,self.a_max_a_noH,self.a_mean_a_noH= self.mmmd(a_a_noH) self.a_min_s_noH,self.a_max_s_noH,self.a_mean_s_noH= self.mmmd(a_s_noH) self.a_min_m_noH,self.a_max_m_noH,self.a_mean_m_noH= self.mmmd(a_m_noH) self.a_min_h, self.a_max_h, self.a_mean_h = self.mmmd(a_h) # self.b_a_noH_histogram = flex.histogram(data = u_a_noH * eps, n_slots = n_histogram_slots) self.b_a_noH = u_a_noH * eps # need this for phenix gui self.a_a_noH_histogram = flex.histogram(data = a_a_noH, n_slots = n_histogram_slots) # self._show_anisotropy = (xs_a.use_u_aniso()).count(True)
def run(args): for file_name in args: print "File name:", file_name try: pdb_inp = iotbx.pdb.input(file_name=file_name) except KeyboardInterrupt: raise except Exception: libtbx.utils.format_exception() isotropic_b_factors = flex.double() all_eigenvalues = flex.double() for atom in pdb_inp.atoms(): if (atom.uij == (-1, -1, -1, -1, -1, -1)): isotropic_b_factors.append(atom.b) else: all_eigenvalues.extend( flex.double(adptbx.eigenvalues(atom.uij))) all_eigenvalues *= adptbx.u_as_b(1) print "Number of isotropic atoms: ", isotropic_b_factors.size() print "Number of anisotropic atoms:", all_eigenvalues.size() // 3 if (isotropic_b_factors.size() != 0): print "Histogram of isotropic B-factors:" flex.histogram(data=isotropic_b_factors, n_slots=10).show(prefix=" ", format_cutoffs="%7.2f") if (all_eigenvalues.size() != 0): print "Histogram of eigenvalues of anisotropic B-factors:" flex.histogram(data=all_eigenvalues, n_slots=10).show(prefix=" ", format_cutoffs="%7.2f") print
def run(args): for file_name in args: print "File name:", file_name try: pdb_inp = iotbx.pdb.input(file_name=file_name) except KeyboardInterrupt: raise except Exception: libtbx.utils.format_exception() isotropic_b_factors = flex.double() all_eigenvalues = flex.double() for atom in pdb_inp.atoms(): if (atom.uij == (-1,-1,-1,-1,-1,-1)): isotropic_b_factors.append(atom.b) else: all_eigenvalues.extend(flex.double(adptbx.eigenvalues(atom.uij))) all_eigenvalues *= adptbx.u_as_b(1) print "Number of isotropic atoms: ", isotropic_b_factors.size() print "Number of anisotropic atoms:", all_eigenvalues.size() // 3 if (isotropic_b_factors.size() != 0): print "Histogram of isotropic B-factors:" flex.histogram(data=isotropic_b_factors, n_slots=10).show( prefix=" ", format_cutoffs="%7.2f") if (all_eigenvalues.size() != 0): print "Histogram of eigenvalues of anisotropic B-factors:" flex.histogram(data=all_eigenvalues, n_slots=10).show( prefix=" ", format_cutoffs="%7.2f") print
def exercise_03(mon_lib_srv, ener_lib, verbose=0): # # normal run with real model # pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/2ERL_noH.pdb", test=os.path.isfile) if (pdb_file is None): print("Skipping exercise_03: input file not available") return if (verbose): log = sys.stdout else: log = StringIO() params = mmtbx.monomer_library.pdb_interpretation.master_params.extract() params.nonbonded_weight = 16 processed_pdb = mmtbx.monomer_library.pdb_interpretation.process( mon_lib_srv = mon_lib_srv, params=params, ener_lib = ener_lib, file_name = pdb_file, log = log) xray_structure = processed_pdb.xray_structure() restraints_manager = mmtbx.restraints.manager( geometry=processed_pdb.geometry_restraints_manager()) structure_ = xray_structure.deep_copy_scatterers() gradients_calculator=cartesian_dynamics.gradients_calculator_reciprocal_space( restraints_manager = restraints_manager, sites_cart = xray_structure.sites_cart(), wc = 1) cartesian_dynamics.run( xray_structure = xray_structure, gradients_calculator = gradients_calculator, temperature = 300, n_steps = 200, time_step = 0.0005, log = log, verbose = 1) rms1 = xray_structure.rms_difference(structure_) rms2 = structure_.rms_difference(xray_structure) assert rms1 == rms2 rms = rms1 if(verbose): print("rms between structures before and after dynamics = ", rms) array_of_distances_between_each_atom = \ flex.sqrt(structure_.difference_vectors_cart(xray_structure).dot()) if(verbose): flex.histogram( data=array_of_distances_between_each_atom, n_slots=12).show( format_cutoffs="%6.4f") n_rms = 5.3 selected_by_rms = (array_of_distances_between_each_atom > n_rms * rms) outlier_sc = xray_structure.scatterers().select(selected_by_rms) if (outlier_sc.size() != 0): print("number of rms outliers:", outlier_sc.size()) outlier_d = array_of_distances_between_each_atom.select(selected_by_rms) for sc,d in zip(outlier_sc, outlier_d): print(sc.label, d) raise RuntimeError("rms outliers.")
def report(O, plot=None, xy_prefix=None): from cctbx.array_family import flex print "Number of shots:", O.completeness_history.size() - 1 print print "Histogram of counts per reflection:" flex.histogram(O.counts.as_double(), n_slots=8).show(prefix=" ", format_cutoffs="%7.0f") print print "Observations per reflection:" flex.show_count_stats(counts=O.counts, prefix=" ") print " Median:", int(flex.median(O.counts.as_double()) + 0.5) print sys.stdout.flush() if (xy_prefix is None): xy_prefix = "" elif (len(xy_prefix) != 0): xy_prefix = xy_prefix + "_" def dump_xy(name, array): f = open(xy_prefix + "%s.xy" % name, "w") for i, c in enumerate(array): print >> f, i, c dump_xy("completeness_history", O.completeness_history) dump_xy("min_count_history", O.min_count_history) if (O.use_symmetry): _ = O.i_calc.asu else: _ = O.i_calc.p1_anom _ = _.customized_copy(data=O.counts).sort(by_value="resolution") sym_factors = _.space_group().order_p() if (not O.i_calc.asu.anomalous_flag()): sym_factors *= 2 sym_factors /= _.multiplicities().data() counts_sorted_by_resolution = _.data().as_int() * sym_factors dump_xy("counts_sorted_by_resolution", counts_sorted_by_resolution) dump_xy("d_spacings_sorted_by_resolution", _.d_spacings().data()) if (plot == "completeness"): from libtbx import pyplot fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) _ = O.completeness_history nx = _.size() ax.plot(range(nx), _, "r-") ax.axis([0, nx, 0, 1]) pyplot.show() elif (plot == "redundancy"): from libtbx import pyplot fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) _ = counts_sorted_by_resolution ax.plot(range(len(_)), _, "r-") ax.axis([-_.size() * 0.05, _.size() * 1.05, 0, None]) pyplot.show() elif (plot is not None): raise RuntimeError('Unknown plot type: "%s"' % plot)
def report(O, plot=None, xy_prefix=None): from cctbx.array_family import flex print "Number of shots:", O.completeness_history.size()-1 print print "Histogram of counts per reflection:" flex.histogram(O.counts.as_double(), n_slots=8).show( prefix=" ", format_cutoffs="%7.0f") print print "Observations per reflection:" flex.show_count_stats(counts=O.counts, prefix=" ") print " Median:", int(flex.median(O.counts.as_double())+0.5) print sys.stdout.flush() if (xy_prefix is None): xy_prefix = "" elif (len(xy_prefix) != 0): xy_prefix = xy_prefix + "_" def dump_xy(name, array): f = open(xy_prefix + "%s.xy" % name, "w") for i,c in enumerate(array): print >> f, i, c dump_xy("completeness_history", O.completeness_history) dump_xy("min_count_history", O.min_count_history) if (O.use_symmetry): _ = O.i_calc.asu else: _ = O.i_calc.p1_anom _ = _.customized_copy(data=O.counts).sort(by_value="resolution") sym_factors = _.space_group().order_p() if (not O.i_calc.asu.anomalous_flag()): sym_factors *= 2 sym_factors /= _.multiplicities().data() counts_sorted_by_resolution = _.data().as_int() * sym_factors dump_xy("counts_sorted_by_resolution", counts_sorted_by_resolution) dump_xy("d_spacings_sorted_by_resolution", _.d_spacings().data()) if (plot == "completeness"): from libtbx import pyplot fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) _ = O.completeness_history nx = _.size() ax.plot(range(nx), _, "r-") ax.axis([0, nx, 0, 1]) pyplot.show() elif (plot == "redundancy"): from libtbx import pyplot fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) _ = counts_sorted_by_resolution ax.plot(range(len(_)), _, "r-") ax.axis([-_.size()*0.05, _.size()*1.05, 0, None]) pyplot.show() elif (plot is not None): raise RuntimeError('Unknown plot type: "%s"' % plot)
def exercise_03(mon_lib_srv, ener_lib, verbose=0): # # normal run with real model # pdb_file = libtbx.env.find_in_repositories(relative_path="phenix_regression/pdb/2ERL_noH.pdb", test=os.path.isfile) if pdb_file is None: print "Skipping exercise_03: input file not available" return if verbose: log = sys.stdout else: log = StringIO() params = mmtbx.monomer_library.pdb_interpretation.master_params.extract() params.nonbonded_weight = 16 processed_pdb = mmtbx.monomer_library.pdb_interpretation.process( mon_lib_srv=mon_lib_srv, params=params, ener_lib=ener_lib, file_name=pdb_file, log=log ) xray_structure = processed_pdb.xray_structure() restraints_manager = mmtbx.restraints.manager(geometry=processed_pdb.geometry_restraints_manager()) structure_ = xray_structure.deep_copy_scatterers() gradients_calculator = cartesian_dynamics.gradients_calculator_reciprocal_space( restraints_manager=restraints_manager, sites_cart=xray_structure.sites_cart(), wc=1 ) cartesian_dynamics.run( xray_structure=xray_structure, gradients_calculator=gradients_calculator, temperature=300, n_steps=200, time_step=0.0005, log=log, verbose=1, ) rms1 = xray_structure.rms_difference(structure_) rms2 = structure_.rms_difference(xray_structure) assert rms1 == rms2 rms = rms1 if verbose: print "rms between structures before and after dynamics = ", rms array_of_distances_between_each_atom = flex.sqrt(structure_.difference_vectors_cart(xray_structure).dot()) if verbose: flex.histogram(data=array_of_distances_between_each_atom, n_slots=12).show(format_cutoffs="%6.4f") n_rms = 5.3 selected_by_rms = array_of_distances_between_each_atom > n_rms * rms outlier_sc = xray_structure.scatterers().select(selected_by_rms) if outlier_sc.size() != 0: print "number of rms outliers:", outlier_sc.size() outlier_d = array_of_distances_between_each_atom.select(selected_by_rms) for sc, d in zip(outlier_sc, outlier_d): print sc.label, d raise RuntimeError("rms outliers.")
def density_truncation(self): min_fraction = self.params.density_truncation.fraction_min max_fraction = self.params.density_truncation.fraction_max if min_fraction is None and max_fraction is None: return if min_fraction is Auto: min_fraction = self.mean_protein_density - self.f000_over_v hist = flex.histogram(self.map.select(self.protein_iselection), n_slots=10000) if max_fraction is not None: self.truncate_max = hist.get_cutoff( int(self.n_protein_grid_points * (1 - max_fraction))) truncate_max_sel = (self.map > self.truncate_max) & self.protein_selection self.map.set_selected(truncate_max_sel, self.truncate_max) self.truncate_max_percent = (truncate_max_sel.count(True) / self.n_protein_grid_points) * 100 if min_fraction is not None: self.truncate_min = hist.get_cutoff( int(self.n_protein_grid_points * (1 - min_fraction))) truncate_min_sel = (self.map < self.truncate_min) & self.protein_selection self.map.set_selected(truncate_min_sel, self.truncate_min) self.truncate_min_percent = (truncate_min_sel.count(True) / self.n_protein_grid_points) * 100 self.mean_protein_density = flex.mean( self.map.select(self.protein_iselection))
def __init__(self, pdb_hierarchy, xray_structure, use_hydrogens=False, geometry_restraints_manager=None): if (not use_hydrogens): not_hd_sel = ~xray_structure.hd_selection() pdb_hierarchy = pdb_hierarchy.select(not_hd_sel) xray_structure = xray_structure.select(not_hd_sel) if (geometry_restraints_manager is not None): geometry_restraints_manager = \ geometry_restraints_manager.select(not_hd_sel) b_isos = xray_structure.extract_u_iso_or_u_equiv() * adptbx.u_as_b(1.) sites_cart = xray_structure.sites_cart() asc = pdb_hierarchy.atom_selection_cache() def get_stats(sel_str, rms_bonded=False): sel = asc.selection(sel_str) xrs = xray_structure.select(sel) n_iso = xrs.use_u_iso().count(True) n_aniso = xrs.use_u_aniso().count(True) anisotropy = xrs.scatterers().anisotropy(unit_cell=xrs.unit_cell()) if (sel.count(True) == 0): return None b_isos_selected = b_isos.select(sel) sites_cart_selected = sites_cart.select(sel) mi, ma, me = b_isos_selected.min_max_mean().as_tuple() rms_b_iso_bonded = None if (rms_bonded and geometry_restraints_manager is not None): grm = geometry_restraints_manager.select(sel) rms_b_iso_bonded = rms_b_iso_or_b_equiv_bonded( geometry_restraints_manager=geometry_restraints_manager. select(sel), sites_cart=sites_cart_selected, b_isos=b_isos_selected) return group_args(min=mi, max=ma, mean=me, n_iso=n_iso, n_aniso=n_aniso, n_zero=(b_isos_selected < 0.01).count(True), rms_b_iso_bonded=rms_b_iso_bonded) overall = get_stats(sel_str="all", rms_bonded=True) protein = get_stats(sel_str="protein", rms_bonded=True) nucleotide = get_stats(sel_str="nucleotide", rms_bonded=True) hd = get_stats(sel_str="element H or element D") water = get_stats(sel_str="water") other = get_stats(sel_str="not (water or nucleotide or protein)") chains = {} for chain in pdb_hierarchy.chains(): chains[chain.id] = get_stats(sel_str="chain '%s'" % chain.id) histogram = flex.histogram(data=b_isos, n_slots=10) self._result = group_args(overall=overall, protein=protein, nucleotide=nucleotide, hd=hd, water=water, other=other, chains=chains, histogram=histogram)
def show_histogram(data, n_slots=50, out=None, prefix=""): if (out is None): out = sys.stdout print('\n' + prefix, file=out) # Stats data_basic_stats = scitbx.math.basic_statistics(data) print('\n Number : %7.4f ' % (data_basic_stats.n), file=out) print(' Min : %7.4f ' % (data_basic_stats.min), file=out) print(' Max : %7.4f ' % (data_basic_stats.max), file=out) print(' Mean : %7.4f ' % (data_basic_stats.mean), file=out) print(' Stdev : %7.4f ' % (data_basic_stats.biased_standard_deviation), file=out) print(' Skew : %7.4f ' % (data_basic_stats.skew), file=out) print(' Sum : %7.4f ' % (data_basic_stats.sum), file=out) # Histo histogram = flex.histogram(data=data, n_slots=n_slots) low_cutoff = histogram.data_min() for i, n in enumerate(histogram.slots()): high_cutoff = histogram.data_min() + histogram.slot_width() * ( i + 1) print("%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n), file=out) low_cutoff = high_cutoff out.flush() return histogram
def estimate_d_c(Dij): ''' Estimate the value of d_c using the assumption that each cluster will be gaussian distributed in it's dij values. If we can find out how many of those gaussians are there in the Dij distribution, we can get an estimate of the d_c from the standard deviation of the individual gaussians''' from scitbx.array_family import flex Dij_max = max(Dij.as_1d()) Dij_min = min(Dij.as_1d()) # Rounding off to closest multiple of 10 n_slots = (int(Dij_max) // 10 + 1) * 10 if n_slots == 10: return 1.0 hist_data = flex.histogram(Dij.as_1d(), n_slots=n_slots) # Divide the data further into bins and see if there are dead zones with data on either sides. # This will indicate that there are 2+ clusters y = hist_data.slots() x = hist_data.slot_centers() moving_avg_bin = [] for i in range(0, n_slots, 10): moving_avg_bin.append(flex.mean(flex.double(list(y[i:i + 10])))) # There has to be one cluster close to 0.0, take that as reference point and find out where the next cluster is min_avg = min(moving_avg_bin) * 2.0 d_c = 1.0 for i, avg in enumerate(moving_avg_bin): if avg <= min_avg: d_c = float(i * 10.0) break return d_c
def show_histogram(data, n_slots): hm = flex.histogram(data=data, n_slots=n_slots) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) for (i_1, n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1 + 1) print "%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1) lc_1 = hc_1
def show_histogram(data, n_slots): hm = flex.histogram(data = data, n_slots = n_slots) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) for (i_1,n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1+1) print "%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1) lc_1 = hc_1
def show_histogram(data, n_slots): print(flex.min(data), flex.max(data), flex.mean(data)) hm = flex.histogram(data = data, n_slots = n_slots) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) for (i_1,n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1+1) print("%10.3f - %-10.3f : %10.2f" % (lc_1, hc_1, float(n_1)/(data.size())*100.)) lc_1 = hc_1
def show_histogram(data, n_slots, log): from cctbx.array_family import flex hm = flex.histogram(data = data, n_slots = n_slots) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) for (i_1,n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1+1) print >> log, "%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1) lc_1 = hc_1
def show_histogram(data, n_slots): print flex.min(data), flex.max(data), flex.mean(data) hm = flex.histogram(data = data, n_slots = n_slots) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) for (i_1,n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1+1) print "%10.3f - %-10.3f : %10.2f" % (lc_1, hc_1, float(n_1)/(data.size())*100.) lc_1 = hc_1
def show_histogram(data, n_slots, log): from cctbx.array_family import flex hm = flex.histogram(data=data, n_slots=n_slots) lc_1 = hm.data_min() s_1 = enumerate(hm.slots()) for (i_1, n_1) in s_1: hc_1 = hm.data_min() + hm.slot_width() * (i_1 + 1) print("%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1), file=log) lc_1 = hc_1
def show_histogram(data, n_slots, out=None, prefix=""): if (out is None): out = sys.stdout print >> out, prefix histogram = flex.histogram(data=data, n_slots=n_slots) low_cutoff = histogram.data_min() for i, n in enumerate(histogram.slots()): high_cutoff = histogram.data_min() + histogram.slot_width() * (i + 1) print >> out, "%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n) low_cutoff = high_cutoff out.flush() return histogram
def _show_each(edges): for edge, ref_edge, label in zip(edges, ref_edges, labels): h = flex.histogram(edge, n_slots=n_slots) smin, smax = flex.min(edge), flex.max(edge) stats = flex.mean_and_variance(edge) print >> out, " %s edge" % label print >> out, " range: %6.2f - %.2f" % (smin, smax) print >> out, " mean: %6.2f +/- %6.2f on N = %d" % ( stats.mean(), stats.unweighted_sample_standard_deviation(), edge.size()) print >> out, " reference: %6.2f" % ref_edge h.show(f=out, prefix=" ", format_cutoffs="%6.2f") print >> out, ""
def unit_cell_histograms(crystals): params = [flex.double() for i in range(6)] for cryst in crystals: unit_cell = cryst.get_unit_cell().parameters() for i in range(6): params[i].append(unit_cell[i]) histograms = [] for i in range(6): histograms.append(flex.histogram(params[i], n_slots=100)) return histograms
def _show_each (edges) : for edge, ref_edge, label in zip(edges, ref_edges, labels) : h = flex.histogram(edge, n_slots=n_slots) smin, smax = flex.min(edge), flex.max(edge) stats = flex.mean_and_variance(edge) print >> out, " %s edge" % label print >> out, " range: %6.2f - %.2f" % (smin, smax) print >> out, " mean: %6.2f +/- %6.2f on N = %d" % ( stats.mean(), stats.unweighted_sample_standard_deviation(), edge.size()) print >> out, " reference: %6.2f" % ref_edge h.show(f=out, prefix=" ", format_cutoffs="%6.2f") print >> out, ""
def shelxd_cc_hist(filename): """Read the logs from filename (from shelxd) scrape out CC's, compute histogram of all, weak, write to stdout""" from cctbx.array_family import flex all = flex.double() weak = flex.double() for record in open(filename): if not record.startswith(" Try"): continue a = float(record[31:36]) w = float(record[38:43]) all.append(a) weak.append(w) h_all = flex.histogram(all, n_slots=220, data_min=-10, data_max=100) h_weak = flex.histogram(weak, n_slots=220, data_min=-10, data_max=100) for b, a, w in zip(h_all.slot_centers(), h_all.slots(), h_weak.slots()): print(b, a, w)
def spot_count_histogram(n_spots, n_bins=20, filename='n_spots_hist.png', log=False): hist = flex.histogram(n_spots.as_double(), n_slots=n_bins) blue = '#3498db' from matplotlib import pyplot pyplot.bar( hist.slot_centers().as_numpy_array(), hist.slots().as_numpy_array(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue, log=log) pyplot.savefig(filename) pyplot.clf()
def show_histogram(data, n_slots, out=None, prefix=""): if (out is None): out = sys.stdout print >> out, prefix histogram = flex.histogram(data = data, n_slots = n_slots) low_cutoff = histogram.data_min() for i,n in enumerate(histogram.slots()): high_cutoff = histogram.data_min() + histogram.slot_width() * (i+1) print >> out, "%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n) low_cutoff = high_cutoff out.flush() return histogram
def plot_wij_histogram(self, plot_name=None): if self._weights is None: return wij = self.wij_matrix.as_1d() hist = flex.histogram(wij, n_slots=50) logger.debug('Histogram of Wij values:') hist.show(f=debug_handle) from matplotlib import pyplot as plt fig = plt.figure(figsize=(10,8)) plt.clf() plt.bar(hist.slot_centers(), hist.slots(), width=hist.slot_width()) plt.yscale('log') plt.xlabel(r'$w_{ij}$') plt.ylabel('Frequency') if plot_name is not None: plt.savefig(plot_name) else: plt.show()
def plot_rij_histogram(self, plot_name=None): rij = self.rij_matrix.as_1d() rij = rij.select(rij != 0) hist = flex.histogram(rij, data_min=-1, data_max=1, n_slots=100) logger.debug('Histogram of Rij values:') hist.show(f=debug_handle) from matplotlib import pyplot as plt fig = plt.figure(figsize=(10,8)) plt.clf() plt.bar(hist.slot_centers(), hist.slots(), width=hist.slot_width()) fontsize = 24 plt.xlabel(r'$r_{ij}$', size=fontsize) plt.ylabel('Frequency', size=fontsize) plt.tick_params(axis='both', which='both', labelsize=fontsize) plt.tight_layout() if plot_name is not None: plt.savefig(plot_name, dpi=300) else: plt.show()
def unit_cell_histograms(crystals): params = [flex.double() for i in range(6)] for cryst in crystals: unit_cell = cryst.get_unit_cell().parameters() for i in range(6): params[i].append(unit_cell[i]) histograms = [] for i in range(6): histograms.append(flex.histogram(params[i], n_slots=100)) median_unit_cell = uctbx.unit_cell([flex.median(p) for p in params]) modal_unit_cell = uctbx.unit_cell( [h.slot_centers()[flex.max_index(h.slots())] for h in histograms] ) print("Modal unit cell: %s" % str(modal_unit_cell)) print("Median unit cell: %s" % str(median_unit_cell)) return histograms
def density_truncation(self): min_fraction = self.params.density_truncation.fraction_min max_fraction = self.params.density_truncation.fraction_max if min_fraction is None and max_fraction is None: return if min_fraction is Auto: min_fraction = self.mean_protein_density-self.f000_over_v hist = flex.histogram( self.map.select(self.protein_iselection), n_slots=10000) if max_fraction is not None: self.truncate_max = hist.get_cutoff( int(self.n_protein_grid_points * (1-max_fraction))) truncate_max_sel = (self.map > self.truncate_max) & self.protein_selection self.map.set_selected(truncate_max_sel, self.truncate_max) self.truncate_max_percent = ( truncate_max_sel.count(True) / self.n_protein_grid_points) * 100 if min_fraction is not None: self.truncate_min = hist.get_cutoff( int(self.n_protein_grid_points * (1-min_fraction))) truncate_min_sel = (self.map < self.truncate_min) & self.protein_selection self.map.set_selected(truncate_min_sel, self.truncate_min) self.truncate_min_percent = ( truncate_min_sel.count(True) / self.n_protein_grid_points) * 100 self.mean_protein_density = flex.mean( self.map.select(self.protein_iselection))
def show_histogram(data, n_slots=50, out=None, prefix=""): if out is None: out = sys.stdout print >> out, "\n" + prefix # Stats data_basic_stats = scitbx.math.basic_statistics(data) print >> out, "\n Number : %7.4f " % (data_basic_stats.n) print >> out, " Min : %7.4f " % (data_basic_stats.min) print >> out, " Max : %7.4f " % (data_basic_stats.max) print >> out, " Mean : %7.4f " % (data_basic_stats.mean) print >> out, " Stdev : %7.4f " % (data_basic_stats.biased_standard_deviation) print >> out, " Skew : %7.4f " % (data_basic_stats.skew) print >> out, " Sum : %7.4f " % (data_basic_stats.sum) # Histo histogram = flex.histogram(data=data, n_slots=n_slots) low_cutoff = histogram.data_min() for i, n in enumerate(histogram.slots()): high_cutoff = histogram.data_min() + histogram.slot_width() * (i + 1) print >> out, "%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n) low_cutoff = high_cutoff out.flush() return histogram
def add_cells_and_files(self, cells, symm_str): self.cells = cells # Table table_str = "" for idx, xac in enumerate(cells): cell = cells[xac] table_str += "<tr>\n" table_str += " <td>%.4d</td><td>%s</td>" % (idx+1, xac) # idx, file table_str += "".join(map(lambda x: "<td>%.2f</td>"%x, cell)) table_str += "\n</tr>\n" # Hist cellconstr = CellConstraints(sgtbx.space_group_info(symm_str).group()) show_flags = (True, not cellconstr.is_b_equal_a(), not cellconstr.is_c_equal_a_b(), not cellconstr.is_angle_constrained("alpha"), not cellconstr.is_angle_constrained("beta"), not cellconstr.is_angle_constrained("gamma")) names = ("a", "b", "c", "α", "β", "γ") hist_str = "" label1 = "" for i, (name, show) in enumerate(zip(names, show_flags)): tmp = "" if i in (0,3): tmp += "<tr>" if show: tmp += "<th>%s</th>" % name if i in (2,5): tmp += "</tr>" if i < 3: hist_str += tmp else: label1 += tmp hist_str += "\n<tr>\n" for idx, (name, show) in enumerate(zip(names, show_flags)): if idx==3: hist_str += "</tr>" + label1 + "<tr>" if not show: continue vals = flex.double(map(lambda x: x[idx], cells.values())) if len(vals) == 0: continue nslots = max(30, int((max(vals) - min(vals)) / 0.5)) hist = flex.histogram(vals, n_slots=nslots) x_vals = map(lambda i: hist.data_min() + hist.slot_width() * (i+.5), xrange(len(hist.slots()))) y_vals = hist.slots() hist_str += """ <td> <div id="chartdiv_cell%(idx)d" style="width: 500px; height: 400px;"></div> <script> var chart_cell%(idx)d = AmCharts.makeChart("chartdiv_cell%(idx)d", { "type": "serial", "theme": "none", "legend": { "useGraphSettings": true, "markerSize":12, "valueWidth":0, "verticalGap":0 }, "dataProvider": [%(data)s], "valueAxes": [{ "minorGridAlpha": 0.08, "minorGridEnabled": true, "position": "top", "axisAlpha":0 }], "graphs": [{ "balloonText": "[[category]]: [[value]]", "title": "%(name)s", "type": "column", "fillAlphas": 0.8, "valueField": "yval" }], "rotate": false, "categoryField": "xval", "categoryAxis": { "gridPosition": "start", "title": "" } }); </script> </td> """ % dict(idx=idx, name=name, data=",".join(map(lambda x: '{"xval":%.2f,"yval":%d}'%x, zip(x_vals,y_vals))) ) hist_str += "</tr>" self.html_inputfiles = """ <h2>Input files</h2> %d files for merging in %s symmetry <h3>Unit cell histogram</h3> <table> %s </table> <h3>Files</h3> <a href="#" onClick="toggle_show('div-input-files'); return false;">Show/Hide</a> <div id="div-input-files" style="display:none;"> <table class="cells"> <tr> <th>idx</th> <th>file</th> <th>a</th> <th>b</th> <th>c</th> <th>α</th> <th>β</th> <th>γ</th> </tr> %s </table> </div> """ % (len(cells), symm_str, hist_str, table_str) self.write_html()
def __init__( self, pdb_hierarchy, restraints_manager, molprobity_scores=False, n_histogram_slots=10, cdl_restraints=False, ignore_hydrogens=False, #only used by amber automatically_use_amber=True, ): super(geometry, self).__init__( pdb_hierarchy=pdb_hierarchy, molprobity_scores=molprobity_scores) self.cdl_restraints=cdl_restraints sites_cart = pdb_hierarchy.atoms().extract_xyz() energies_sites = \ restraints_manager.energies_sites( sites_cart = sites_cart, compute_gradients = False) if(hasattr(energies_sites, "geometry")): esg = energies_sites.geometry else: esg = energies_sites self.a = None self.b = None self.angle_deltas = None self.bond_deltas = None if not hasattr(esg, "angle_deviations"): return if automatically_use_amber and hasattr(esg, "amber"): self.used_amber=True amber_parm = restraints_manager.amber_structs.parm self.a, angle_deltas = esg.angle_deviations(sites_cart, amber_parm, ignore_hd=ignore_hydrogens, get_deltas=True) self.b, bond_deltas = esg.bond_deviations(sites_cart, amber_parm, ignore_hd=ignore_hydrogens, get_deltas=True) self.a_number = esg.n_angle_proxies(amber_parm, ignore_hd=ignore_hydrogens) self.b_number = esg.n_bond_proxies(amber_parm, ignore_hd=ignore_hydrogens) self.c, self.p, self.ll, self.d, self.n = None, None, None, None, None self.c_number=0 self.p_number=0 self.d_number=0 self.bond_deltas_histogram = \ flex.histogram(data = flex.abs(bond_deltas), n_slots = n_histogram_slots) self.angle_deltas_histogram = \ flex.histogram(data = flex.abs(angle_deltas), n_slots = n_histogram_slots) # nonbonded_distances = esg.nonbonded_distances() # self.nonbonded_distances_histogram = flex.histogram( # data = flex.abs(nonbonded_distances), n_slots = n_histogram_slots) for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] : for value_type in [("mean",2), ("max",1), ("min",0)] : name = "%s_%s" % (restraint_type, value_type[0]) if getattr(self, restraint_type) is None: setattr(self, name, None) continue setattr(self, name, getattr(self, restraint_type)[value_type[1]]) return self.a = esg.angle_deviations() self.b = esg.bond_deviations() self.a_number = esg.get_filtered_n_angle_proxies() self.b_number = esg.get_filtered_n_bond_proxies() self.c = esg.chirality_deviations() self.d = esg.dihedral_deviations() self.p = esg.planarity_deviations() self.ll = esg.parallelity_deviations() self.n = esg.nonbonded_deviations() self.c_number = esg.n_chirality_proxies self.d_number = esg.n_dihedral_proxies self.p_number = esg.n_planarity_proxies self.n_number = esg.n_nonbonded_proxies # for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] : for value_type in [("mean",2), ("max",1), ("min",0)] : name = "%s_%s" % (restraint_type, value_type[0]) if getattr(self, restraint_type) is None: continue setattr(self, name, getattr(self, restraint_type)[value_type[1]]) # if(hasattr(restraints_manager, "geometry")): rmg = restraints_manager.geometry else: rmg = restraints_manager self.bond_deltas = geometry_restraints.bond_deltas( sites_cart = sites_cart, sorted_asu_proxies = rmg.pair_proxies().bond_proxies) self.angle_deltas = geometry_restraints.angle_deltas( sites_cart = sites_cart, proxies = rmg.angle_proxies) self.nonbonded_distances = esg.nonbonded_distances() self.number_of_worst_clashes = (self.nonbonded_distances<0.5).count(True) self.bond_deltas_histogram = \ flex.histogram(data = flex.abs(self.bond_deltas), n_slots = n_histogram_slots) self.angle_deltas_histogram = \ flex.histogram(data = flex.abs(self.angle_deltas), n_slots = n_histogram_slots) self.nonbonded_distances_histogram = flex.histogram( data = flex.abs(self.nonbonded_distances), n_slots = n_histogram_slots) # assert approx_equal( esg.target, esg.angle_residual_sum+ esg.bond_residual_sum+ esg.chirality_residual_sum+ esg.dihedral_residual_sum+ esg.nonbonded_residual_sum+ esg.planarity_residual_sum+ esg.parallelity_residual_sum+ esg.reference_coordinate_residual_sum+ esg.reference_dihedral_residual_sum+ esg.ncs_dihedral_residual_sum+ esg.den_residual_sum+ esg.ramachandran_residual_sum) del energies_sites, esg # we accumulate this object, so make it clean asap
elif key == 'mapped_predictions': print key, data[key][0][0], "(only first shown of %d)"%len(data[key][0]) elif key == 'correction_vectors' and data[key] is not None and data[key][0] is not None: if data[key][0] is None: print key, "None" else: print key, data[key][0][0], "(only first shown)" elif key == "DATA": print key,"len=%d max=%f min=%f dimensions=%s"%(data[key].size(),flex.max(data[key]),flex.min(data[key]),str(data[key].focus())) elif key == "WAVELENGTH": print "WAVELENGTH", data[key], ", converted to eV:", 12398.4187/data[key] elif key == "applied_absorption_correction": print key, data[key] if doplots: c = data[key][0] hist = flex.histogram(c, n_slots=30) from matplotlib import pyplot as plt plt.scatter(hist.slot_centers(), hist.slots()) plt.show() obs = data['observations'][0] preds = data['mapped_predictions'][0] p1 = preds.select(c == 1.0) p2 = preds.select((c != 1.0) & (c <= 1.5)) plt.scatter(preds.parts()[1], preds.parts()[0], c='g') plt.scatter(p1.parts()[1], p1.parts()[0], c='b') plt.scatter(p2.parts()[1], p2.parts()[0], c='r') plt.show() else: print key, data[key]
def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if self.r_free_flags.data().count(True) == 0: self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13, ) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data(), ) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs="%7.3f") print >>self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())
def run(args): from cctbx.array_family import flex from dials.util.options import OptionParser from dials.util.options import flatten_reflections import libtbx.load_env usage = "%s [options] reflections_1.pickle reflections_2.pickle" % ( libtbx.env.dispatcher_name) parser = OptionParser(usage=usage, phil=phil_scope, read_reflections=True, epilog=help_message) params, options, args = parser.parse_args(show_diff_phil=True, return_unhandled=True) reflections = flatten_reflections(params.input.reflections) if flex.max(reflections[0]["id"]) > 0: reflections = list(reversed(reflections)) assert flex.max(reflections[0]["id"]) == 0 assert len(reflections) == 2 partialities = [] intensities = [] sigmas = [] ids = [] xyz = [] # only want fully-recorded reflections in full dataset # reflections[0] = reflections[0].select(reflections[0]['partiality'] > 0.99) print(reflections[0].size()) # only want partial reflections in sliced dataset # reflections[1] = reflections[1].select(reflections[1]['partiality'] < 0.99) print(reflections[1].size()) for refl in reflections: # sel = refl.get_flags(refl.flags.integrated_sum) sel = refl.get_flags(refl.flags.integrated) sel &= refl["intensity.sum.value"] > 0 sel &= refl["intensity.sum.variance"] > 0 refl = refl.select(sel) hkl = refl["miller_index"] partiality = refl["partiality"] intensity = refl["intensity.sum.value"] vari = refl["intensity.sum.variance"] assert vari.all_gt(0) sigi = flex.sqrt(vari) intensities.append(intensity) partialities.append(partiality) sigmas.append(sigi) ids.append(refl["id"]) xyz.append(refl["xyzcal.px"]) from annlib_ext import AnnAdaptor as ann_adaptor ann = ann_adaptor(xyz[0].as_double().as_1d(), 3) ann.query(xyz[1].as_double().as_1d()) distances = flex.sqrt(ann.distances) matches = distances < 2 # pixels isel0 = flex.size_t(list(ann.nn.select(matches))) isel1 = flex.size_t(list(matches.iselection())) p0 = partialities[0].select(isel0) p1 = partialities[1].select(isel1) i0 = intensities[0].select(isel0) i1 = intensities[1].select(isel1) print((p0 > p1).count(True), (p0 < p1).count(True)) h0 = flex.histogram(p0, data_min=0, data_max=1, n_slots=20) h1 = flex.histogram(p1, data_min=0, data_max=1, n_slots=20) h0.show() h1.show() from matplotlib import pyplot perm0 = flex.sort_permutation(p0) perm1 = flex.sort_permutation(p1) fig, axes = pyplot.subplots(nrows=2, sharex=True) axes[0].plot(p0.select(perm0), flex.int_range(p0.size())) axes[1].plot(p1.select(perm1), flex.int_range(p1.size())) axes[1].set_xlabel("Partiality") for ax in axes: ax.set_ylabel("Cumulative frequency") for ax in axes: ax.set_yscale("log") pyplot.savefig("sorted_partialities.png") pyplot.clf() blue = "#3498db" fig, axes = pyplot.subplots(nrows=2, sharex=True) axes[0].bar( h0.slot_centers(), h0.slots(), width=h0.slot_width(), align="center", color=blue, edgecolor=blue, ) axes[1].bar( h1.slot_centers(), h1.slots(), width=h1.slot_width(), align="center", color=blue, edgecolor=blue, ) axes[1].set_xlabel("Partiality") for ax in axes: ax.set_ylabel("Frequency") for ax in axes: ax.set_yscale("log") pyplot.savefig("partiality_histogram.png") # pyplot.show() pyplot.clf() pyplot.scatter(p0, p1, s=5, alpha=0.3, marker="+") pyplot.xlabel("Partiality (full)") pyplot.ylabel("Partiality (sliced)") pyplot.savefig("partiality_full_vs_sliced.png") pyplot.clf() pyplot.scatter(i0, i1, s=5, alpha=0.3, marker="+") pyplot.xlim(flex.min(i0), flex.max(i0)) pyplot.ylim(flex.min(i1), flex.max(i1)) pyplot.xlabel("Intensity (full)") pyplot.ylabel("Intensity (sliced)") pyplot.xscale("log") pyplot.yscale("log") pyplot.savefig("intensity_full_vs_sliced.png") pyplot.clf() i_ratio = i1 / i0 p_ratio = p1 / p0 pyplot.scatter(p_ratio, i_ratio, s=5, alpha=0.3, marker="+") pyplot.ylim(flex.min(i_ratio), flex.max(i_ratio)) pyplot.yscale("log") pyplot.xlabel("P(full)/P(sliced)") pyplot.ylabel("I(full)/I(sliced)") pyplot.savefig("partiality_ratio_vs_intensity_ratio.png") pyplot.clf()
def histogram(self, n_slots=10000): return flex.histogram(data=self.map.as_1d(), n_slots=n_slots)
def __init__( self, pdb_hierarchy, restraints_manager, molprobity_scores=False, n_histogram_slots=10, cdl_restraints=False, ignore_hydrogens=False, #only used by amber automatically_use_amber=True, ): super(geometry, self).__init__( pdb_hierarchy=pdb_hierarchy, molprobity_scores=molprobity_scores) if(restraints_manager is not None): self.cdl_restraints=cdl_restraints sites_cart = pdb_hierarchy.atoms().extract_xyz() energies_sites = \ restraints_manager.energies_sites( sites_cart = sites_cart, compute_gradients = False) if(hasattr(energies_sites, "geometry")): esg = energies_sites.geometry else: esg = energies_sites self.a = None self.b = None self.angle_deltas = None self.bond_deltas = None if not hasattr(esg, "angle_deviations"): return if automatically_use_amber and hasattr(esg, "amber"): self.used_amber=True amber_parm = restraints_manager.amber_structs.parm self.a, angle_deltas, angle_extremes = esg.angle_deviations( sites_cart, amber_parm, ignore_hd=ignore_hydrogens, get_deltas=True, get_extremes=True, ) self.angle_extremes = angle_extremes self.b, bond_deltas, bond_extremes = esg.bond_deviations( sites_cart, amber_parm, ignore_hd=ignore_hydrogens, get_deltas=True, get_extremes=True, ) self.bond_extremes = bond_extremes self.a_number = esg.n_angle_proxies(amber_parm, ignore_hd=ignore_hydrogens) self.b_number = esg.n_bond_proxies(amber_parm, ignore_hd=ignore_hydrogens) self.c, self.p, self.ll, self.d, self.n = None, None, None, None, None self.c_number=0 self.p_number=0 self.d_number=0 self.bond_deltas_histogram = \ flex.histogram(data = flex.abs(bond_deltas), n_slots = n_histogram_slots) self.angle_deltas_histogram = \ flex.histogram(data = flex.abs(angle_deltas), n_slots = n_histogram_slots) # nonbonded_distances = esg.nonbonded_distances() # self.nonbonded_distances_histogram = flex.histogram( # data = flex.abs(nonbonded_distances), n_slots = n_histogram_slots) for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] : for value_type in [("mean",2), ("max",1), ("min",0)] : name = "%s_%s" % (restraint_type, value_type[0]) if getattr(self, restraint_type) is None: setattr(self, name, None) continue setattr(self, name, getattr(self, restraint_type)[value_type[1]]) return self.a = esg.angle_deviations() self.b = esg.bond_deviations() self.b_z = esg.bond_deviations_z() self.a_z = esg.angle_deviations_z() self.b_w = esg.bond_deviations_weighted() self.a_w = esg.angle_deviations_weighted() self.a_number = esg.get_filtered_n_angle_proxies() self.b_number = esg.get_filtered_n_bond_proxies() self.c = esg.chirality_deviations() self.d = esg.dihedral_deviations() self.p = esg.planarity_deviations() self.ll = esg.parallelity_deviations() self.n = esg.nonbonded_deviations() self.c_number = esg.n_chirality_proxies self.d_number = esg.n_dihedral_proxies self.p_number = esg.n_planarity_proxies self.n_number = esg.n_nonbonded_proxies # for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] : for value_type in [("mean",2), ("max",1), ("min",0)] : name = "%s_%s" % (restraint_type, value_type[0]) if getattr(self, restraint_type) is None: continue setattr(self, name, getattr(self, restraint_type)[value_type[1]]) # if(hasattr(restraints_manager, "geometry")): rmg = restraints_manager.geometry else: rmg = restraints_manager bond_deltas = geometry_restraints.bond_deltas( sites_cart = sites_cart, sorted_asu_proxies = rmg.pair_proxies().bond_proxies) angle_deltas = geometry_restraints.angle_deltas( sites_cart = sites_cart, proxies = rmg.angle_proxies) nonbonded_distances = esg.nonbonded_distances() self.number_of_worst_clashes = (nonbonded_distances<0.5).count(True) self.bond_deltas_histogram = \ flex.histogram(data = flex.abs(bond_deltas), n_slots = n_histogram_slots) self.angle_deltas_histogram = \ flex.histogram(data = flex.abs(angle_deltas), n_slots = n_histogram_slots) self.nonbonded_distances_histogram = flex.histogram( data = flex.abs(nonbonded_distances), n_slots = n_histogram_slots) # assert approx_equal( esg.target, esg.angle_residual_sum+ esg.bond_residual_sum+ esg.chirality_residual_sum+ esg.dihedral_residual_sum+ esg.nonbonded_residual_sum+ esg.planarity_residual_sum+ esg.parallelity_residual_sum+ esg.reference_coordinate_residual_sum+ esg.reference_dihedral_residual_sum+ esg.ncs_dihedral_residual_sum+ esg.den_residual_sum+ esg.ramachandran_residual_sum) del energies_sites, esg # we accumulate this object, so make it clean asap
if data[key][0] is None: print key, "None" else: print key, data[key][0][0], "(only first shown)" elif key == "DATA": print key, "len=%d max=%f min=%f dimensions=%s" % ( data[key].size(), flex.max(data[key]), flex.min( data[key]), str(data[key].focus())) elif key == "WAVELENGTH": print "WAVELENGTH", data[ key], ", converted to eV:", 12398.4187 / data[key] elif key == "fuller_kapton_absorption_correction": print key, data[key] if doplots: c = data[key][0] hist = flex.histogram(c, n_slots=30) from matplotlib import pyplot as plt plt.scatter(hist.slot_centers(), hist.slots()) plt.show() obs = data['observations'][0] preds = data['mapped_predictions'][0] p1 = preds.select(c == 1.0) p2 = preds.select((c != 1.0) & (c <= 1.5)) plt.scatter(preds.parts()[1], preds.parts()[0], c='g') plt.scatter(p1.parts()[1], p1.parts()[0], c='b') plt.scatter(p2.parts()[1], p2.parts()[0], c='r') plt.show() else: print key, data[key]
def hist(data): from cStringIO import StringIO sio = StringIO() flex.histogram(data=data, n_slots=10) \ .show(f=sio, prefix=" ", format_cutoffs="%8.2f") return sio.getvalue().splitlines()
def keywise_printout(data): for key in data: if key == 'ACTIVE_AREAS': print int(len(data[key])/4), "active areas, first one: ", list(data[key][0:4]) elif key == 'observations': print key, data[key], "Showing unit cell/spacegroup:" obs = data[key][0] uc = obs.unit_cell() uc.show_parameters() obs.space_group().info().show_summary() d = uc.d(obs.indices()) print "Number of observations:", len(obs.indices()) print "Max resolution: %f"%flex.min(d) print "Mean I/sigma:", flex.mean(obs.data())/flex.mean(obs.sigmas()) print "I/sigma > 1 count:", (obs.data()/obs.sigmas() > 1).count(True) print "I <= 0:", len(obs.data().select(obs.data() <= 0)) from cctbx.crystal import symmetry sym = symmetry(unit_cell = uc, space_group = obs.space_group()) mset = sym.miller_set(indices = obs.indices(), anomalous_flag=False) binner = mset.setup_binner(n_bins=20) acceptable_resolution_bins = [] binned_avg_i_sigi = [] for i in binner.range_used(): d_max, d_min = binner.bin_d_range(i) sel = (d <= d_max) & (d > d_min) sel &= obs.data() > 0 intensities = obs.data().select(sel) sigmas = obs.sigmas().select(sel) n_refls = len(intensities) avg_i = flex.mean(intensities) if n_refls > 0 else 0 avg_i_sigi = flex.mean(intensities / sigmas) if n_refls > 0 else 0 acceptable_resolution_bins.append(avg_i_sigi >= 1.0) acceptable_resolution_bins = [acceptable_resolution_bins[i] if False not in acceptable_resolution_bins[:i+1] else False for i in range(len(acceptable_resolution_bins))] best_res = None for i, ok in zip(binner.range_used(), acceptable_resolution_bins): d_max, d_min = binner.bin_d_range(i) if ok: best_res = d_min else: break if best_res is None: print "Highest resolution with I/sigI >= 1.0: None" else: print "Highest resolution with I/sigI >= 1.0: %f"%d_min elif key == 'mapped_predictions': print key, data[key][0][0], "(only first shown of %d)"%len(data[key][0]) elif key == 'correction_vectors' and data[key] is not None and data[key][0] is not None: if data[key][0] is None: print key, "None" else: print key, data[key][0][0], "(only first shown)" elif key == "DATA": print key,"len=%d max=%f min=%f dimensions=%s"%(data[key].size(),flex.max(data[key]),flex.min(data[key]),str(data[key].focus())) elif key == "WAVELENGTH": print "WAVELENGTH", data[key], ", converted to eV:", 12398.4187/data[key] elif key == "fuller_kapton_absorption_correction": print key, data[key] if doplots: c = data[key][0] hist = flex.histogram(c, n_slots=30) from matplotlib import pyplot as plt plt.scatter(hist.slot_centers(), hist.slots()) plt.show() obs = data['observations'][0] preds = data['mapped_predictions'][0] p1 = preds.select(c == 1.0) p2 = preds.select((c != 1.0) & (c <= 1.5)) plt.scatter(preds.parts()[1], preds.parts()[0], c='g') plt.scatter(p1.parts()[1], p1.parts()[0], c='b') plt.scatter(p2.parts()[1], p2.parts()[0], c='r') plt.show() else: print key, data[key]
def model_based_outliers(self, f_model, level=.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if (self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data()) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs='%7.3f') print >> self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())