def plot_histograms(self, reflections, panel = None, ax = None, bounds = None): data = reflections['difference_vector_norms'] colors = ['b-', 'g-', 'g--', 'r-', 'b-', 'b--'] n_slots = 20 if self.params.residuals.histogram_max is None: h = flex.histogram(data, n_slots=n_slots) else: h = flex.histogram(data.select(data <= self.params.residuals.histogram_max), n_slots=n_slots) n = len(reflections) rmsd_obs = math.sqrt((reflections['xyzcal.mm']-reflections['xyzobs.mm.value']).sum_sq()/n) sigma = mode = h.slot_centers()[list(h.slots()).index(flex.max(h.slots()))] mean_obs = flex.mean(data) median = flex.median(data) mean_rayleigh = math.sqrt(math.pi/2)*sigma rmsd_rayleigh = math.sqrt(2)*sigma data = flex.vec2_double([(i,j) for i, j in zip(h.slot_centers(), h.slots())]) n = len(data) for i in [mean_obs, mean_rayleigh, mode, rmsd_obs, rmsd_rayleigh]: data.extend(flex.vec2_double([(i, 0), (i, flex.max(h.slots()))])) data = self.get_bounded_data(data, bounds) tmp = [data[:n]] for i in xrange(len(colors)): tmp.append(data[n+(i*2):n+((i+1)*2)]) data = tmp for d, c in zip(data, colors): ax.plot(d.parts()[0], d.parts()[1], c) if ax.get_legend() is None: ax.legend([r"$\Delta$XY", "MeanObs", "MeanRayl", "Mode", "RMSDObs", "RMSDRayl"])
def histogram(self, reflections, title): data = reflections['difference_vector_norms'] n_slots = 100 if self.params.residuals.histogram_max is None: h = flex.histogram(data, n_slots=n_slots) else: h = flex.histogram(data.select(data <= self.params.residuals.histogram_max), n_slots=n_slots) n = len(reflections) rmsd = math.sqrt((reflections['xyzcal.mm']-reflections['xyzobs.mm.value']).sum_sq()/n) sigma = mode = h.slot_centers()[list(h.slots()).index(flex.max(h.slots()))] mean = flex.mean(data) median = flex.median(data) print "RMSD (microns)", rmsd * 1000 print "Histogram mode (microns):", mode * 1000 print "Overall mean (microns):", mean * 1000 print "Overall median (microns):", median * 1000 mean2 = math.sqrt(math.pi/2)*sigma rmsd2 = math.sqrt(2)*sigma print "Rayleigh Mean (microns)", mean2 * 1000 print "Rayleigh RMSD (microns)", rmsd2 * 1000 r = reflections['radial_displacements'] t = reflections['transverse_displacements'] print "Overall radial RMSD (microns)", math.sqrt(flex.sum_sq(r)/len(r)) * 1000 print "Overall transverse RMSD (microns)", math.sqrt(flex.sum_sq(t)/len(t)) * 1000 fig = plt.figure() ax = fig.add_subplot(111) ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') vmax = self.params.residuals.plot_max if self.params.residuals.histogram_xmax is not None: ax.set_xlim((0,self.params.residuals.histogram_xmax)) if self.params.residuals.histogram_ymax is not None: ax.set_ylim((0,self.params.residuals.histogram_ymax)) plt.title(title) ax.plot((mean, mean), (0, flex.max(h.slots())), 'g-') ax.plot((mean2, mean2), (0, flex.max(h.slots())), 'g--') ax.plot((mode, mode), (0, flex.max(h.slots())), 'r-') ax.plot((rmsd, rmsd), (0, flex.max(h.slots())), 'b-') ax.plot((rmsd2, rmsd2), (0, flex.max(h.slots())), 'b--') ax.legend([r"$\Delta$XY", "MeanObs", "MeanRayl", "Mode", "RMSDObs", "RMSDRayl"]) ax.set_xlabel("(mm)") ax.set_ylabel("Count")
def plot_unitcells(self, experiments): if len(experiments) == 1: return all_a = flex.double() all_b = flex.double() all_c = flex.double() for crystal in experiments.crystals(): a, b, c = crystal.get_unit_cell().parameters()[0:3] all_a.append(a); all_b.append(b); all_c.append(c) fig, axes = plt.subplots(nrows=3, ncols=1) for ax, axis, data in zip(axes, ['A', 'B', 'C'], [all_a, all_b, all_c]): stats = flex.mean_and_variance(data) cutoff = 4*stats.unweighted_sample_standard_deviation() if cutoff < 0.5: cutoff = 0.5 limits = stats.mean()-cutoff, stats.mean()+cutoff sel = (data >= limits[0]) & (data <= limits[1]) subset = data.select(sel) h = flex.histogram(subset,n_slots=50) ax.plot(h.slot_centers().as_numpy_array(),h.slots().as_numpy_array(),'-') ax.set_title("%s axis histogram (showing %d of %d xtals). Mean: %7.2f Stddev: %7.2f"%( axis, len(subset), len(data), stats.mean(), stats.unweighted_sample_standard_deviation())) ax.set_ylabel("N lattices") ax.set_xlabel(r"$\AA$") ax.set_xlim(limits) plt.tight_layout()
def plot_cdf_manually(self, reflections, panel = None, ax = None, bounds = None): colors = ['blue', 'green'] r = (reflections['xyzcal.mm']-reflections['xyzobs.mm.value']).norms() h = flex.histogram(r) sigma = h.slot_centers()[list(h.slots()).index(flex.max(h.slots()))] # mode x_extent = max(r) y_extent = len(r) xobs = [i/x_extent for i in sorted(r)] yobs = [i/y_extent for i in xrange(y_extent)] obs = [(x, y) for x, y in zip(xobs, yobs)] ncalc = 100 xcalc = [i/ncalc for i in xrange(ncalc)] ycalc = [1-math.exp((-i**2)/(2*(sigma**2))) for i in xcalc] calc = [(x, y) for x, y in zip(xcalc, ycalc)] data = [flex.vec2_double(obs), flex.vec2_double(calc)] if bounds is None: ax.set_xlim((-1,1)) ax.set_ylim((-1,1)) ax.set_title("%s Outlier SP Manually"%self.params.tag) if bounds is not None: data = [self.get_bounded_data(d, bounds) for d in data] if ax is None: fig = plt.figure() ax = fig.add_subplot(111) for subset,c in zip(data, colors): ax.plot(subset.parts()[0], subset.parts()[1], '-', c=c)
def plot_difference_vector_norms_histograms(self, reflections, panel = None, ax = None, bounds = None): r = reflections['difference_vector_norms']*1000 h = flex.histogram(r, n_slots=50, data_min=0, data_max=100) x_extent = max(r) y_extent = len(r) xobs = [i/x_extent for i in sorted(r)] yobs = [i/y_extent for i in xrange(y_extent)] obs = [(x, y) for x, y in zip(xobs, yobs)] if bounds is None: #ax.set_xlim((-1,1)) #ax.set_ylim((-1,1)) x = h.slot_centers().as_numpy_array() y = h.slots().as_numpy_array() ax.set_title("%s Residual norms histogram"%self.params.tag) if bounds is not None: d = flex.vec2_double(h.slot_centers(), h.slots().as_double()) data = self.get_bounded_data(d, bounds) x, y = data.parts() if ax is None: fig = plt.figure() ax = fig.add_subplot(111) ax.plot(x, y, '-', c='blue')
def blank_counts_analysis(reflections, scan, phi_step, fractional_loss): if not len(reflections): raise ValueError("Input contains no reflections") xyz_px = reflections["xyzobs.px.value"] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc array_range = scan.get_array_range() phi_min = scan.get_angle_from_array_index(array_range[0]) phi_max = scan.get_angle_from_array_index(array_range[1]) assert phi_min <= flex.min(phi) assert phi_max >= flex.max(phi) n_steps = max(int(round((phi_max - phi_min) / phi_step)), 1) hist = flex.histogram( z_px, data_min=array_range[0], data_max=array_range[1], n_slots=n_steps ) logger.debug("Histogram:") logger.debug(hist.as_str()) counts = hist.slots() fractional_counts = counts.as_double() / flex.max(counts) potential_blank_sel = fractional_counts <= fractional_loss xmin, xmax = zip( *[ (slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos() ] ) d = { "data": [ { "x": list(hist.slot_centers()), "y": list(hist.slots()), "xlow": xmin, "xhigh": xmax, "blank": list(potential_blank_sel), "type": "bar", "name": "blank_counts_analysis", } ], "layout": { "xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0, }, } blank_regions = blank_regions_from_sel(d["data"][0]) d["blank_regions"] = blank_regions return d
def plot_outliers(data): """plots positions of outliers""" if not data["z"]: return {"outlier_xy_positions": {}, "outliers_vs_z": {}} hist = flex.histogram( flex.double(data["z"]), n_slots=min(100, int(len(data["z"]) * 10)) ) d = { "outlier_xy_positions": { "data": [ { "x": data["x"], "y": data["y"], "type": "scatter", "mode": "markers", "xaxis": "x", "yaxis": "y", } ], "layout": { "title": "Outlier x-y positions", "xaxis": { "anchor": "y", "title": "x (px)", "range": [0, data["image_size"][0]], }, "yaxis": { "anchor": "x", "title": "y (px)", "range": [0, data["image_size"][1]], }, }, }, "outliers_vs_z": { "data": [ { "x": list(hist.slot_centers()), "y": list(hist.slots()), "type": "bar", "name": "outliers vs rotation", } ], "layout": { "title": "Outlier distribution across frames", "xaxis": {"title": "frame"}, "yaxis": {"title": "count"}, "bargap": 0, }, }, } return d
def histogram(params, images): from astrotbx.input_output.loader import load_raw_image from dials.array_family import flex hr = None hg = None hb = None for image in images: r, g, b = load_raw_image(image, params=params.raw) tr = flex.histogram(r.as_1d(), data_min=0, data_max=65535, n_slots=4096) tg = flex.histogram(g.as_1d(), data_min=0, data_max=65535, n_slots=4096) tb = flex.histogram(b.as_1d(), data_min=0, data_max=65535, n_slots=4096) if hr is None: hr = tr else: hr.update(tr) if hg is None: hg = tg else: hg.update(tg) if hb is None: hb = tb else: hb.update(tb) with open(params.output, 'w') as f: for cn in zip(hr.slot_centers(), hr.slots(), hg.slots(), hb.slots()): f.write('%.2f %d %d %d\n' % cn)
def run(self, flags, sequence=None, observations=None, **kwargs): obs_x, obs_y = observations.centroids().px_position_xy().parts() import numpy as np H, xedges, yedges = np.histogram2d( obs_x.as_numpy_array(), obs_y.as_numpy_array(), bins=self.nbins ) H_flex = flex.double(H.flatten().astype(np.float64)) n_slots = min(int(flex.max(H_flex)), 30) hist = flex.histogram(H_flex, n_slots=n_slots) slots = hist.slots() cumulative_hist = flex.long(len(slots)) for i, slot in enumerate(slots): cumulative_hist[i] = slot if i > 0: cumulative_hist[i] += cumulative_hist[i - 1] cumulative_hist = cumulative_hist.as_double() / flex.max( cumulative_hist.as_double() ) cutoff = None gradients = flex.double() for i in range(len(slots) - 1): x1 = cumulative_hist[i] x2 = cumulative_hist[i + 1] g = (x2 - x1) / hist.slot_width() gradients.append(g) if ( cutoff is None and i > 0 and g < self.gradient_cutoff and gradients[i - 1] < self.gradient_cutoff ): cutoff = hist.slot_centers()[i - 1] - 0.5 * hist.slot_width() sel = np.column_stack(np.where(H > cutoff)) for (ix, iy) in sel: flags.set_selected( ( (obs_x > xedges[ix]) & (obs_x < xedges[ix + 1]) & (obs_y > yedges[iy]) & (obs_y < yedges[iy + 1]) ), False, ) return flags
def histogram(self, data): from matplotlib import pyplot as plt nslots = 100 histogram = flex.histogram( data=data, n_slots=nslots) out = StringIO() histogram.show(f=out, prefix=" ", format_cutoffs="%6.2f") self.logger.main_log(out.getvalue() + '\n' + "Total: %d"%data.size() + '\n') if False: fig = plt.figure() plt.bar(histogram.slot_centers(), histogram.slots(), align="center", width=histogram.slot_width()) plt.show()
def _show_each(edges): for edge, ref_edge, label in zip(edges, ref_edges, labels): h = flex.histogram(edge, n_slots=n_slots) smin, smax = flex.min(edge), flex.max(edge) stats = flex.mean_and_variance(edge) self.logger.main_log(" %s edge"%label) self.logger.main_log(" range: %6.2f - %.2f"%(smin, smax)) self.logger.main_log(" mean: %6.2f +/- %6.2f on N = %d" %(stats.mean(), stats.unweighted_sample_standard_deviation(), edge.size())) self.logger.main_log(" reference: %6.2f"%ref_edge) out = StringIO() h.show(f=out, prefix=" ", format_cutoffs="%6.2f") self.logger.main_log(out.getvalue() + '\n')
def run(args): from dials.util.options import OptionParser import libtbx.load_env usage = "%s [options] *ARW" % ( libtbx.env.dispatcher_name) parser = OptionParser( usage=usage, phil=phil_scope) params, options, args = parser.parse_args(show_diff_phil=True, return_unhandled=True) from astrotbx.input_output.loader import load_image_gs, load_raw_image_gs from astrotbx.algorithms.star_find import hot from dials.array_family import flex raws = ['arw'] total = None n = 0 for arg in args: n += 1 exten = arg.split('.')[-1].lower() if exten in raws: image = load_raw_image_gs(arg, params.raw) else: image = load_image_gs(arg) signal = hot(image, params).as_1d() if total is None: total = signal.as_int() else: total += signal.as_int() h_total = flex.histogram(total.as_double(), data_min=-0.5, data_max=n + 0.5, n_slots=n + 1) for c, v in zip(h_total.slot_centers(), h_total.slots()): print(c, v) total.reshape(flex.grid(*image.focus())) if params.output: import cPickle as pickle with open(params.output, 'w') as fout: pickle.dump(total, fout, protocol=pickle.HIGHEST_PROTOCOL)
def gain(image, params): from dials.algorithms.image import filter from astrotbx.input_output.loader import load_image_gs, load_raw_image_gs from dials.array_family import flex from matplotlib import pyplot raws = ['arw'] exten = image.split('.')[-1].lower() if exten in raws: image = load_raw_image_gs(image, params.raw) else: image = load_image_gs(image) disp = filter.index_of_dispersion_filter(image, (3, 3)).index_of_dispersion() hist = flex.histogram(disp.as_1d(), data_min=params.min, data_max=params.max, n_slots=params.slots) return hist
def image_rmsd_histogram(self, reflections, tag): data = flex.double() for i in set(reflections['id']): refls = reflections.select(reflections['id']==i) if len(refls) == 0: continue rmsd = math.sqrt(flex.sum_sq(refls['difference_vector_norms'])/len(refls)) data.append(rmsd) data *= 1000 h = flex.histogram(data, n_slots=40) fig = plt.figure() ax = fig.add_subplot('111') ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') plt.title("%sHistogram of image RMSDs"%tag) ax.set_xlabel("RMSD (microns)") ax.set_ylabel("Count") fig = plt.figure() ax = fig.add_subplot('111') plt.boxplot(data, vert=False) plt.title("%sBoxplot of image RMSDs"%tag) ax.set_xlabel("RMSD (microns)")
def cell_hist(): from dials.array_family import flex import sys a = flex.double() b = flex.double() c = flex.double() al = flex.double() be = flex.double() ga = flex.double() for arg in sys.argv[1:]: expt = load_experiment(arg) for xtal in expt.crystals(): cell = xtal.get_unit_cell().parameters() a.append(cell[0]) b.append(cell[1]) c.append(cell[2]) al.append(cell[3]) be.append(cell[4]) ga.append(cell[5]) a_h = flex.histogram(a, data_min=0, data_max=100, n_slots=1000) b_h = flex.histogram(b, data_min=0, data_max=100, n_slots=1000) c_h = flex.histogram(c, data_min=0, data_max=100, n_slots=1000) al_h = flex.histogram(al, data_min=0, data_max=100, n_slots=1000) be_h = flex.histogram(be, data_min=0, data_max=100, n_slots=1000) ga_h = flex.histogram(ga, data_min=0, data_max=100, n_slots=1000) for v in zip( a_h.slot_centers(), a_h.slots(), b_h.slots(), c_h.slots(), al_h.slots(), be_h.slots(), ga_h.slots(), ): print("%5.2f %5d %5d %5d %5d %5d %5d" % v)
def blank_integrated_analysis(reflections, scan, phi_step, fractional_loss): prf_sel = reflections.get_flags(reflections.flags.integrated_prf) if prf_sel.count(True) > 0: reflections = reflections.select(prf_sel) intensities = reflections["intensity.prf.value"] variances = reflections["intensity.prf.variance"] else: sum_sel = reflections.get_flags(reflections.flags.integrated_sum) reflections = reflections.select(sum_sel) intensities = reflections["intensity.sum.value"] variances = reflections["intensity.sum.variance"] i_sigi = intensities / flex.sqrt(variances) xyz_px = reflections["xyzobs.px.value"] x_px, y_px, z_px = xyz_px.parts() phi = scan.get_angle_from_array_index(z_px) osc = scan.get_oscillation()[1] n_images_per_step = iceil(phi_step / osc) phi_step = n_images_per_step * osc array_range = scan.get_array_range() phi_min = flex.min(phi) phi_max = flex.max(phi) n_steps = int(round((phi_max - phi_min) / phi_step)) hist = flex.histogram( z_px, data_min=array_range[0], data_max=array_range[1], n_slots=n_steps ) logger.debug("Histogram:") logger.debug(hist.as_str()) mean_i_sigi = flex.double() for i, slot_info in enumerate(hist.slot_infos()): sel = (z_px >= slot_info.low_cutoff) & (z_px < slot_info.high_cutoff) if sel.count(True) == 0: mean_i_sigi.append(0) else: mean_i_sigi.append(flex.mean(i_sigi.select(sel))) potential_blank_sel = mean_i_sigi <= (fractional_loss * flex.max(mean_i_sigi)) xmin, xmax = zip( *[ (slot_info.low_cutoff, slot_info.high_cutoff) for slot_info in hist.slot_infos() ] ) d = { "data": [ { "x": list(hist.slot_centers()), "y": list(mean_i_sigi), "xlow": xmin, "xhigh": xmax, "blank": list(potential_blank_sel), "type": "bar", "name": "blank_counts_analysis", } ], "layout": { "xaxis": {"title": "z observed (images)"}, "yaxis": {"title": "Number of reflections"}, "bargap": 0, }, } blank_regions = blank_regions_from_sel(d["data"][0]) d["blank_regions"] = blank_regions return d
def get_histogram(d_star_sq, target_n_per_bin=20, max_slots=20, min_slots=5): n_slots = len(d_star_sq)//target_n_per_bin n_slots = min(n_slots, max_slots) n_slots = max(n_slots, min_slots) return flex.histogram(d_star_sq, n_slots=n_slots)
for z0 in zs: s0 = z == (z0 + 0.5) xy0 = xy.select(s0) n0 = xy0.size() if n0 < 5: continue from annlib_ext import AnnAdaptor as ann_adaptor ann = ann_adaptor(xy0.as_double().as_1d(), 2) for z1 in zs: if z1 >= z0: break s1 = z == (z1 + 0.5) xy1 = xy.select(s1) n1 = xy1.size() if n1 < 5: continue ann.query(xy1.as_double().as_1d()) d1 = flex.sqrt(ann.distances) m01 = (d1 < 5.0).count(True) s = m01 / (0.5 * (n0 + n1)) sij[z0, z1] = s sij[z1, z0] = s pickle.dump(sij, open("sij.pickle", "w")) hij = flex.histogram(sij.as_1d(), data_min=0, data_max=1, n_slots=100) for _c, _s in zip(hij.slot_centers(), hij.slots()): print(_c, _s)
def run(args): import libtbx.load_env usage = "%s [options]" % libtbx.env.dispatcher_name parser = OptionParser( usage=usage, phil=phil_scope, check_format=False, epilog=help_message ) params, options, args = parser.parse_args( show_diff_phil=True, return_unhandled=True ) space_group = params.space_group if space_group is None: space_group = sgtbx.space_group() else: space_group = space_group.group() unit_cell = params.unit_cell if unit_cell is None: unit_cell = space_group.info().any_compatible_unit_cell(volume=100000) print(unit_cell) assert len(args) == 2 from cctbx import crystal, miller cs = crystal.symmetry(space_group=space_group, unit_cell=unit_cell) intensities = [] for filename in args: hkl, i, sigi = parse_best_hkl(filename) ms = miller.set(cs, hkl) ma = miller.array(ms, data=i, sigmas=sigi) ma.set_observation_type_xray_intensity() intensities.append(ma) # ma.show_summary() # Two subplots, the axes array is 1-d from matplotlib import pyplot ma1, ma2 = intensities hist1 = flex.histogram(ma1.data(), n_slots=100) hist2 = flex.histogram(ma2.data(), n_slots=100) f, axarr = pyplot.subplots(2, sharex=True, figsize=(16, 12)) axarr[0].bar( hist1.slot_centers() - 0.5 * hist1.slot_width(), hist1.slots(), align="center", width=hist1.slot_width(), color="black", edgecolor=None, ) axarr[1].bar( hist2.slot_centers() - 0.5 * hist2.slot_width(), hist2.slots(), align="center", width=hist2.slot_width(), color="black", edgecolor=None, ) pyplot.savefig("hist_intensities.png") pyplot.clf() hist1 = flex.histogram(ma1.data() / ma1.sigmas(), n_slots=100) hist2 = flex.histogram(ma2.data() / ma2.sigmas(), n_slots=100) f, axarr = pyplot.subplots(2, sharex=True, figsize=(16, 12)) axarr[0].bar( hist1.slot_centers() - 0.5 * hist1.slot_width(), hist1.slots(), align="center", width=hist1.slot_width(), color="black", edgecolor=None, ) axarr[1].bar( hist2.slot_centers() - 0.5 * hist2.slot_width(), hist2.slots(), align="center", width=hist2.slot_width(), color="black", edgecolor=None, ) pyplot.savefig("hist_isigi.png") pyplot.clf() print(ma1.d_max_min()) print(ma2.d_max_min()) ma1.setup_binner(n_bins=20) ma2.setup_binner(n_bins=20) imean1 = ma1.mean(use_binning=True) imean2 = ma2.mean(use_binning=True) f, axarr = pyplot.subplots(2, sharex=True, figsize=(16, 12)) axarr[0].plot(imean1.binner.bin_centers(2), imean1.data[1:-1]) axarr[1].plot(imean2.binner.bin_centers(2), imean2.data[1:-1]) ax = pyplot.gca() xticks = ax.get_xticks() xticks_d = ["%.2f" % uctbx.d_star_sq_as_d(ds2) if ds2 > 0 else 0 for ds2 in xticks] ax.set_xticklabels(xticks_d) pyplot.xlabel("d spacing (A)") pyplot.savefig("imean_vs_resolution.png") pyplot.clf() isigi1 = ma1.i_over_sig_i(use_binning=True) isigi2 = ma2.i_over_sig_i(use_binning=True) f, axarr = pyplot.subplots(2, sharex=True, figsize=(16, 12)) axarr[0].plot(isigi1.binner.bin_centers(2), isigi1.data[1:-1]) axarr[1].plot(isigi2.binner.bin_centers(2), isigi2.data[1:-1]) ax = pyplot.gca() xticks = ax.get_xticks() xticks_d = ["%.2f" % uctbx.d_star_sq_as_d(ds2) if ds2 > 0 else 0 for ds2 in xticks] ax.set_xticklabels(xticks_d) pyplot.xlabel("d spacing (A)") pyplot.savefig("isigi_vs_resolution.png") pyplot.clf() best_cb_op = None best_count = 0 for i_op, op in enumerate( space_group.build_derived_reflection_intensity_group(False).all_ops() ): if not op.t().is_zero(): continue cb_op = sgtbx.change_of_basis_op(op) # .inverse()) ma1, ma2 = intensities ma1, ma2 = ma1.common_sets(ma2.change_basis(cb_op)) # print cb_op # print ma1.size(), ma2.size() if ma1.size() > best_count: best_cb_op = cb_op best_count = ma1.size() print("Best cb_op: %s (%i matches)" % (best_cb_op, best_count)) ma1, ma2 = intensities ma1, ma2 = ma1.common_sets(ma2.change_basis(best_cb_op)) from matplotlib import pyplot pyplot.scatter(ma1.data(), ma2.data(), marker="+", alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m), c="black") pyplot.xlabel(args[0]) pyplot.ylabel(args[1]) pyplot.savefig("scatter_intensities.png") pyplot.clf() pyplot.scatter(ma1.sigmas(), ma2.sigmas(), marker="+", alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m), c="black") pyplot.savefig("scatter_sigmas.png") pyplot.clf() pyplot.scatter( flex.pow2(ma1.sigmas()), flex.pow2(ma2.sigmas()), marker="+", alpha=0.5 ) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m), c="black") pyplot.savefig("scatter_variances.png") pyplot.clf() isigi1 = ma1.data() / ma1.sigmas() isigi2 = ma2.data() / ma2.sigmas() pyplot.scatter(isigi1, isigi2, marker="+", alpha=0.5) m = max(pyplot.xlim()[1], pyplot.ylim()[1]) pyplot.plot((0, m), (0, m), c="black") pyplot.savefig("scatter_i_sig_i.png") pyplot.clf() return
def run(args): from libtbx.phil import command_line from dials.util.command_line import Importer from dials.array_family import flex print args importer = Importer(args, check_format=False) assert len(importer.datablocks) == 1 sweeps = importer.datablocks[0].extract_imagesets() assert len(sweeps) == 1 sweep = sweeps[0] cmd_line = command_line.argument_interpreter(master_params=master_phil_scope) working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments) working_phil.show() params = working_phil.extract() assert params.unit_cell is not None assert params.space_group is not None unit_cell = params.unit_cell space_group = params.space_group.group() import random from dxtbx.model.crystal import crystal_model from cctbx import crystal, miller from scitbx import matrix flex.set_random_seed(params.random_seed) random.seed(params.random_seed) crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) # the reciprocal matrix B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose() n_predicted = flex.double() def predict_once(args): from dxtbx.model.experiment.experiment_list import Experiment U = args[0] A = U * B direct_matrix = A.inverse() cryst_model = crystal_model(direct_matrix[0:3], direct_matrix[3:6], direct_matrix[6:9], space_group=space_group) experiment = Experiment(imageset=sweep, beam=sweep.get_beam(), detector=sweep.get_detector(), goniometer=sweep.get_goniometer(), scan=sweep.get_scan(), crystal=cryst_model) predicted_reflections = flex.reflection_table.from_predictions( experiment) miller_indices = predicted_reflections['miller_index'] miller_set = miller.set( crystal_symmetry, miller_indices, anomalous_flag=True) if params.d_min is not None: resolution_sel = miller_set.d_spacings().data() > params.d_min predicted_reflections = predicted_reflections.select(resolution_sel) return len(predicted_reflections) from libtbx import easy_mp args = [(random_rotation(),) for i in range(params.n_samples)] results = easy_mp.parallel_map( func=predict_once, iterable=args, processes=params.nproc, preserve_order=True, preserve_exception_message=True) n_predicted = flex.double(results) print "Basic statistics:" from scitbx.math import basic_statistics stats = basic_statistics(n_predicted) stats.show() print "Histogram:" hist = flex.histogram(n_predicted, n_slots=20) hist.show() print "Raw spot counts:" print list(n_predicted) if params.plot: from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc('font', family='serif') pyplot.rc('font', serif='Times New Roman') red, blue = '#B2182B', '#2166AC' fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Spot count') ax.set_ylabel('Frequency') pdf = PdfPages("predicted_count_histogram.pdf") pdf.savefig(fig) pdf.close()
def run(args): from libtbx.phil import command_line from dials.util.command_line import Importer from dials.array_family import flex print(args) importer = Importer(args, check_format=False) assert len(importer.datablocks) == 1 sweeps = importer.datablocks[0].extract_imagesets() assert len(sweeps) == 1 sweep = sweeps[0] cmd_line = command_line.argument_interpreter(master_params=master_phil_scope) working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments) working_phil.show() params = working_phil.extract() assert params.unit_cell is not None assert params.space_group is not None unit_cell = params.unit_cell space_group = params.space_group.group() import random from dxtbx.model.crystal import crystal_model from cctbx import crystal, miller from scitbx import matrix flex.set_random_seed(params.random_seed) random.seed(params.random_seed) crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) # the reciprocal matrix B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose() n_predicted = flex.double() def predict_once(args): from dxtbx.model.experiment.experiment_list import Experiment U = args[0] A = U * B direct_matrix = A.inverse() cryst_model = crystal_model( direct_matrix[0:3], direct_matrix[3:6], direct_matrix[6:9], space_group=space_group, ) experiment = Experiment( imageset=sweep, beam=sweep.get_beam(), detector=sweep.get_detector(), goniometer=sweep.get_goniometer(), scan=sweep.get_scan(), crystal=cryst_model, ) predicted_reflections = flex.reflection_table.from_predictions(experiment) miller_indices = predicted_reflections["miller_index"] miller_set = miller.set(crystal_symmetry, miller_indices, anomalous_flag=True) if params.d_min is not None: resolution_sel = miller_set.d_spacings().data() > params.d_min predicted_reflections = predicted_reflections.select(resolution_sel) return len(predicted_reflections) from libtbx import easy_mp args = [(random_rotation(),) for i in range(params.n_samples)] results = easy_mp.parallel_map( func=predict_once, iterable=args, processes=params.nproc, preserve_order=True, preserve_exception_message=True, ) n_predicted = flex.double(results) print("Basic statistics:") from scitbx.math import basic_statistics stats = basic_statistics(n_predicted) stats.show() print("Histogram:") hist = flex.histogram(n_predicted, n_slots=20) hist.show() print("Raw spot counts:") print(list(n_predicted)) if params.plot: from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc("font", family="serif") pyplot.rc("font", serif="Times New Roman") red, blue = "#B2182B", "#2166AC" fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( hist.slot_centers(), hist.slots(), width=0.75 * hist.slot_width(), color=blue, edgecolor=blue, ) ax.set_xlabel("Spot count") ax.set_ylabel("Frequency") pdf = PdfPages("predicted_count_histogram.pdf") pdf.savefig(fig) pdf.close()
def plot_uc_histograms(uc_params, outliers, steps_per_angstrom=20, plot_name='uc_histograms.png'): from matplotlib import pyplot as plt plt.style.use('ggplot') uc_labels = ['a', 'b', 'c'] f, ax = plt.subplots(nrows=2, ncols=3, figsize=(12, 8)) a, b, c = uc_params[:3] def uc_param_hist2d(p1, p2, ax): nbins = 100 import numpy as np H, xedges, yedges = np.histogram2d(p1, p2, bins=nbins) H = np.rot90(H) H = np.flipud(H) Hmasked = np.ma.masked_where(H == 0, H) ax.pcolormesh(xedges, yedges, Hmasked) uc_param_hist2d(a, b, ax[0][0]) uc_param_hist2d(b, c, ax[0][1]) uc_param_hist2d(c, a, ax[0][2]) for i in range(3): mmm = flex.min_max_mean_double(uc_params[i]) import math steps_per_A = steps_per_angstrom Amin = math.floor(mmm.min * steps_per_A) / steps_per_A Amax = math.floor(mmm.max * steps_per_A) / steps_per_A n_slots = int((Amax - Amin) * steps_per_A) hist = flex.histogram(uc_params[i], Amin, Amax, n_slots=n_slots) hist_inliers = flex.histogram(uc_params[i].select(~outliers), Amin, Amax, n_slots=n_slots) ax[1][i].bar(hist.slot_centers(), hist.slots(), align='center', width=hist.slot_width(), zorder=10, color='black', edgecolor=None, linewidth=0) ax[1][i].bar(hist_inliers.slot_centers(), hist_inliers.slots(), align='center', width=hist_inliers.slot_width(), zorder=10, color='red', edgecolor=None, linewidth=0) ax[0][0].set_ylabel('b ($\AA$)') ax[0][1].set_ylabel('c ($\AA$)') ax[0][2].set_ylabel('a ($\AA$)') ax[1][0].set_xlabel('a ($\AA$)') ax[1][1].set_xlabel('b ($\AA$)') ax[1][2].set_xlabel('c ($\AA$)') f.savefig(plot_name) plt.tight_layout() plt.close(f)
def normal_probability_plot(data, label=None): """Plot the distribution of normal probabilities of errors.""" n = data["delta_hl"].size y = np.sort(data["delta_hl"]) delta = 0.5 / n v = np.linspace(start=delta, stop=1.0 - delta, endpoint=True, num=n) x = norm.ppf(v) H, xedges, yedges = np.histogram2d(x, y, bins=(200, 200)) nonzeros = np.nonzero(H) z = np.empty(H.shape) z[:] = np.NAN z[nonzeros] = H[nonzeros] # also make a histogram histy = flex.histogram(flumpy.from_numpy(y), n_slots=100) # make a gaussian for reference also n = y.size width = histy.slot_centers()[1] - histy.slot_centers()[0] gaussian = [ n * width * math.exp(-(sc ** 2) / 2.0) / ((2.0 * math.pi) ** 0.5) for sc in histy.slot_centers() ] key = ( f"normal_distribution_plot_{label}" if label is not None else "normal_distribution_plot" ) title = "Normal probability plot with error model applied" title = title + f" (error model {label})" if label is not None else title key_hist = f"nor_dev_hist_{label}" if label is not None else "nor_dev_hist" title_hist = "Normal deviations with error model applied" title_hist = ( title_hist + f" (error model {label})" if label is not None else title_hist ) return { key: { "data": [ { "x": xedges.tolist(), "y": yedges.tolist(), "z": z.transpose().tolist(), "type": "heatmap", "name": "normalised deviations", "colorbar": { "title": "Number of reflections", "titleside": "right", }, "colorscale": "Jet", }, { "x": [-5, 5], "y": [-5, 5], "type": "scatter", "mode": "lines", "name": "z = m", "color": "rgb(0,0,0)", }, ], "layout": { "title": title, "xaxis": {"anchor": "y", "title": "Order statistic medians, m"}, "yaxis": {"anchor": "x", "title": "Ordered responses, z"}, }, "help": """\ This plot shows the normalised devations (of each reflection from the group-weighted mean), sorted in order and plotted against the expected order based on a normal distribution model. A true normal distribution of deviations would give the straight line indicated. If the errors are well described by this model, the ordered responses should closely fit the straight line to high absolute values of x (>3), where there is typically a deviation away from the line due to wide tails of the distribution. """, }, key_hist: { "data": [ { "x": list(histy.slot_centers()), "y": list(histy.slots()), "type": "bar", "name": "dataset normalised deviations", }, { "x": list(histy.slot_centers()), "y": gaussian, "type": "scatter", "name": "Ideal normal distribution", }, ], "layout": { "title": title_hist, "xaxis": {"anchor": "y", "title": "Normalised deviation"}, "yaxis": {"anchor": "x", "title": "Number of reflections"}, }, "help": """\ This plot shows the distribution of normalised devations (of each reflection from the group-weighted mean), for the reflections used to minimise the error model. A true normal distribution is indicated. """, }, }
def plot_absorption_plots(physical_model, reflection_table=None): """Make a number of plots to help with the interpretation of the absorption correction.""" # First plot the absorption surface d = { "absorption_surface": { "data": [], "layout": { "title": "Absorption correction surface", "xaxis": { "domain": [0, 1], "anchor": "y", "title": "azimuthal angle (degrees)", }, "yaxis": { "domain": [0, 1], "anchor": "x", "title": "polar angle (degrees)", }, }, "help": absorption_help_msg, } } params = physical_model.components["absorption"].parameters order = int(-1.0 + ((1.0 + len(params)) ** 0.5)) lfg = scitbxmath.log_factorial_generator(2 * order + 1) STEPS = 50 azimuth_ = np.linspace(0, 2 * np.pi, 2 * STEPS) polar_ = np.linspace(0, np.pi, STEPS) THETA, _ = np.meshgrid(azimuth_, polar_, indexing="ij") lmax = int(-1.0 + ((1.0 + len(params)) ** 0.5)) Intensity = np.ones(THETA.shape) undiffracted_intensity = np.ones(THETA.shape) counter = 0 sqrt2 = math.sqrt(2) nsssphe = scitbxmath.nss_spherical_harmonics(order, 50000, lfg) for l in range(1, lmax + 1): for m in range(-l, l + 1): for it, t in enumerate(polar_): for ip, p in enumerate(azimuth_): Ylm = nsssphe.spherical_harmonic(l, abs(m), t, p) if m < 0: r = sqrt2 * ((-1) ** m) * Ylm.imag elif m == 0: assert Ylm.imag == 0.0 r = Ylm.real else: r = sqrt2 * ((-1) ** m) * Ylm.real Intensity[ip, it] += params[counter] * r # for the undiffracted intensity, we want to add the correction # at each point to the parity conjugate. We can use the fact # that the odd l terms are parity odd, and even are even, to # just calculate the even terms as follows if l % 2 == 0: undiffracted_intensity[ip, it] += params[counter] * r counter += 1 d["absorption_surface"]["data"].append( { "x": list(azimuth_ * 180.0 / np.pi), "y": list(polar_ * 180.0 / np.pi), "z": list(Intensity.T.tolist()), "type": "heatmap", "colorscale": "Viridis", "colorbar": {"title": "inverse <br>scale factor"}, "name": "absorption surface", "xaxis": "x", "yaxis": "y", } ) d["undiffracted_absorption_surface"] = { "data": [], "layout": { "title": "Undiffracted absorption correction", "xaxis": { "domain": [0, 1], "anchor": "y", "title": "azimuthal angle (degrees)", }, "yaxis": { "domain": [0, 1], "anchor": "x", "title": "polar angle (degrees)", }, }, "help": """ This plot shows the calculated relative absorption for a paths travelling straight through the crystal at a given direction in a crystal-fixed frame of reference (in spherical coordinates). This gives an indication of the effective shape of the crystal for absorbing x-rays. In this plot, the pole (polar angle 0) corresponds to the laboratory x-axis. """, } d["undiffracted_absorption_surface"]["data"].append( { "x": list(azimuth_ * 180.0 / np.pi), "y": list(polar_ * 180.0 / np.pi), "z": list(undiffracted_intensity.T.tolist()), "type": "heatmap", "colorscale": "Viridis", "colorbar": {"title": "inverse <br>scale factor"}, "name": "Undiffracted absorption correction", "xaxis": "x", "yaxis": "y", } ) if not reflection_table: return d # now plot the directions of the scattering vectors d["vector_directions"] = { "data": [], "layout": { "title": "Scattering vectors in crystal frame", "xaxis": { "domain": [0, 1], "anchor": "y", "title": "azimuthal angle (degrees)", "range": [0, 360], }, "yaxis": { "domain": [0, 1], "anchor": "x", "title": "polar angle (degrees)", "range": [0, 180], }, "coloraxis": { "showscale": False, }, }, "help": """ This plot shows the scattering vector directions in the crystal reference frame used to determine the absorption correction. The s0 vectors are plotted in yellow, the s1 vectors are plotted in teal. This gives an indication of which parts of the absorption correction surface are sampled when determining the absorption correction. In this plot, the pole (polar angle 0) corresponds to the laboratory x-axis.""", } STEPS = 180 # do one point per degree azimuth_ = np.linspace(0, 2 * np.pi, 2 * STEPS) polar_ = np.linspace(0, np.pi, STEPS) THETA, _ = np.meshgrid(azimuth_, polar_, indexing="ij") Intensity = np.full(THETA.shape, np.NAN) # note, the s1_lookup, s0_lookup is only calculated for large datasets, so # for small datasets we need to calculate again. if "s1_lookup" not in physical_model.components["absorption"].data: s1_lookup = calc_lookup_index( calc_theta_phi(reflection_table["s1c"]), points_per_degree=1 ) idx_polar, idx_azimuth = np.divmod(np.unique(s1_lookup), 360) Intensity[idx_azimuth, idx_polar] = 1 else: s1_lookup = np.unique(physical_model.components["absorption"].data["s1_lookup"]) # x is phi, y is theta idx_polar, idx_azimuth = np.divmod(s1_lookup, 720) idx_polar = idx_polar // 2 # convert from two points per degree to one idx_azimuth = idx_azimuth // 2 Intensity[idx_azimuth, idx_polar] = 1 d["vector_directions"]["data"].append( { "x": list(azimuth_ * 180.0 / np.pi), "y": list(polar_ * 180.0 / np.pi), "z": list(Intensity.T.tolist()), "type": "heatmap", "colorscale": "Viridis", "showscale": False, "xaxis": "x", "yaxis": "y", "zmin": 0, "zmax": 2, } ) Intensity = np.full(THETA.shape, np.NAN) if "s0_lookup" not in physical_model.components["absorption"].data: s0_lookup = calc_lookup_index( calc_theta_phi(reflection_table["s0c"]), points_per_degree=1 ) idx_polar, idx_azimuth = np.divmod(np.unique(s0_lookup), 360) Intensity[idx_azimuth, idx_polar] = 2 else: s0_lookup = np.unique(physical_model.components["absorption"].data["s0_lookup"]) # x is phi, y is theta idx_polar, idx_azimuth = np.divmod(s0_lookup, 720) idx_polar = idx_polar // 2 # convert from two points per degree to one idx_azimuth = idx_azimuth // 2 Intensity[idx_azimuth, idx_polar] = 2 d["vector_directions"]["data"].append( { "x": list(azimuth_ * 180.0 / np.pi), "y": list(polar_ * 180.0 / np.pi), "z": list(Intensity.T.tolist()), "type": "heatmap", "colorscale": "Viridis", "showscale": False, "xaxis": "x", "yaxis": "y", "zmin": 0, "zmax": 2, } ) scales = physical_model.components["absorption"].calculate_scales() hist = flex.histogram(scales, n_slots=min(100, int(scales.size() * 10))) d["absorption_corrections"] = { "data": [ { "x": list(hist.slot_centers()), "y": list(hist.slots()), "type": "bar", "name": "Applied absorption corrections", }, ], "layout": { "title": "Applied absorption corrections", "xaxis": {"anchor": "y", "title": "Inverse scale factor"}, "yaxis": {"anchor": "x", "title": "Number of reflections"}, }, } return d
def run(args): sweep_directories = [] templates = [] n_strong_spots = flex.int() n_strong_spots_dmin_4 = flex.int() d_strong_spots_99th_percentile = flex.double() d_strong_spots_95th_percentile = flex.double() d_strong_spots_50th_percentile = flex.double() n_unindexed_spots = flex.int() n_indexed_lattices = flex.int() n_integrated_lattices = flex.int() sweep_dir_cryst = flex.std_string() orig_dir = os.path.abspath(os.curdir) rmsds = flex.vec3_double() cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() nproc = easy_mp.get_processes(libtbx.Auto) #nproc = 1 results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True, ) for result in results: if result is None: continue sweep_directories.append(result.sweep_dir) templates.append(result.template) n_strong_spots.append(result.n_strong_spots) n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4) n_unindexed_spots.append(result.n_unindexed_spots) n_indexed_lattices.append(result.n_indexed_lattices) n_integrated_lattices.append(result.n_integrated_lattices) d_strong_spots_50th_percentile.append(result.d_strong_spots_50th_percentile) d_strong_spots_95th_percentile.append(result.d_strong_spots_95th_percentile) d_strong_spots_99th_percentile.append(result.d_strong_spots_99th_percentile) cell_params.extend(result.cell_params) n_indexed.extend(result.n_indexed) d_min_indexed.extend(result.d_min_indexed) rmsds.extend(result.rmsds) sweep_dir_cryst.extend(result.sweep_dir_cryst) table_data = [('sweep_dir', 'template', '#strong_spots', '#unindexed_spots', '#lattices', 'd_spacing_50th_percentile', 'd_spacing_95th_percentile', 'd_spacing_99th_percentile',)] for i in range(len(sweep_directories)): table_data.append((sweep_directories[i], templates[i], str(n_strong_spots[i]), str(n_unindexed_spots[i]), str(n_indexed_lattices[i]), str(d_strong_spots_50th_percentile[i]), str(d_strong_spots_95th_percentile[i]), str(d_strong_spots_99th_percentile[i]), )) with open('results.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') table_data = [('sweep_dir', 'cell_a', 'cell_b', 'cell_c', 'alpha', 'beta', 'gamma', '#indexed_reflections', 'd_min_indexed', 'rmsd_x', 'rmsd_y', 'rmsd_phi')] for i in range(len(cell_params)): table_data.append((sweep_dir_cryst[i], str(cell_params[i][0]), str(cell_params[i][1]), str(cell_params[i][2]), str(cell_params[i][3]), str(cell_params[i][4]), str(cell_params[i][5]), str(n_indexed[i]), str(d_min_indexed[i]), str(rmsds[i][0]), str(rmsds[i][1]), str(rmsds[i][2]), )) with open('results_indexed.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') cell_a = flex.double([params[0] for params in cell_params]) cell_b = flex.double([params[1] for params in cell_params]) cell_c = flex.double([params[2] for params in cell_params]) cell_alpha = flex.double([params[3] for params in cell_params]) cell_beta = flex.double([params[4] for params in cell_params]) cell_gamma = flex.double([params[5] for params in cell_params]) from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc('font', family='serif') pyplot.rc('font', serif='Times New Roman') red, blue = '#B2182B', '#2166AC' hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Spot count') ax.set_ylabel('Frequency') pdf = PdfPages("spot_count_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() hist = flex.histogram(n_indexed_lattices.as_double(), n_slots=flex.max(n_indexed_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of indexed lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_indexed_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() if flex.max(n_integrated_lattices) > 0: hist = flex.histogram(n_integrated_lattices.as_double(), n_slots=flex.max(n_integrated_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of integrated lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_integrated_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False) for i, cell_param in enumerate( (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)): ax = axes.flat[i] flex.min_max_mean_double(cell_param).show() print flex.median(cell_param) hist = flex.histogram(cell_param, n_slots=20) hist.show() ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Cell parameter') ax.set_ylabel('Frequency') pyplot.tight_layout() pdf = PdfPages("cell_parameters.pdf") pdf.savefig(fig) pdf.close()
def normal_probability_plot(data): """Plot the distribution of normal probabilities of errors.""" norm = distributions.normal_distribution() n = len(data["delta_hl"]) if n <= 10: a = 3 / 8 else: a = 0.5 y = flex.sorted(flex.double(data["delta_hl"])) x = [norm.quantile((i + 1 - a) / (n + 1 - (2 * a))) for i in range(n)] H, xedges, yedges = np.histogram2d(np.array(x), y.as_numpy_array(), bins=(200, 200)) nonzeros = np.nonzero(H) z = np.empty(H.shape) z[:] = np.NAN z[nonzeros] = H[nonzeros] # also make a histogram histy = flex.histogram(y, n_slots=100) # make a gaussian for reference also n = y.size() width = histy.slot_centers()[1] - histy.slot_centers()[0] gaussian = [ n * width * math.exp(-(sc**2) / 2.0) / ((2.0 * math.pi)**0.5) for sc in histy.slot_centers() ] return { "normal_distribution_plot": { "data": [ { "x": xedges.tolist(), "y": yedges.tolist(), "z": z.transpose().tolist(), "type": "heatmap", "name": "normalised deviations", "colorbar": { "title": "Number of reflections", "titleside": "right", }, "colorscale": "Jet", }, { "x": [-5, 5], "y": [-5, 5], "type": "scatter", "mode": "lines", "name": "z = m", "color": "rgb(0,0,0)", }, ], "layout": { "title": "Normal probability plot with error model applied", "xaxis": { "anchor": "y", "title": "Order statistic medians, m" }, "yaxis": { "anchor": "x", "title": "Ordered responses, z" }, }, "help": """\ This plot shows the normalised devations (of each reflection from the group-weighted mean), sorted in order and plotted against the expected order based on a normal distribution model. A true normal distribution of deviations would give the straight line indicated. If the errors are well described by this model, the ordered responses should closely fit the straight line to high absolute values of x (>3), where there is typically a deviation away from the line due to wide tails of the distribution. """, }, "nor_dev_hist": { "data": [ { "x": list(histy.slot_centers()), "y": list(histy.slots()), "type": "bar", "name": "dataset normalised deviations", }, { "x": list(histy.slot_centers()), "y": gaussian, "type": "scatter", "name": "Ideal normal distribution", }, ], "layout": { "title": "Normal deviations with error model applied", "xaxis": { "anchor": "y", "title": "Normalised deviation" }, "yaxis": { "anchor": "x", "title": "Number of reflections" }, }, "help": """\ This plot shows the distribution of normalised devations (of each reflection from the group-weighted mean), for the reflections used to minimise the error model. A true normal distribution is indicated. """, }, }
from __future__ import print_function from dials.array_family import flex import cPickle as pickle import sys refl_in = sys.argv[1] refl_out = sys.argv[2] part_lim = float(sys.argv[3]) refl = pickle.load(open(refl_in, "r")) # remove duff reflections sel = refl.get_flags(refl.flags.integrated_sum, all=True) refl = refl.select(sel) sel = refl.get_flags(refl.flags.integrated, all=True) refl = refl.select(sel) # extract partiality, plot histogram, select subset, dump part = refl["partiality"] parth = flex.histogram(part, n_slots=20, data_min=0.0, data_max=1.0) parth.show() edge = part < part_lim edge_refl = refl.select(edge) pickle.dump(edge_refl, open(refl_out, "w")) print("Wrote %d reflections with partiality < %.3f to %s" % (len(edge_refl), part_lim, refl_out))
def run(args): sweep_directories = [] templates = [] n_strong_spots = flex.int() n_strong_spots_dmin_4 = flex.int() d_strong_spots_99th_percentile = flex.double() d_strong_spots_95th_percentile = flex.double() d_strong_spots_50th_percentile = flex.double() n_unindexed_spots = flex.int() n_indexed_lattices = flex.int() n_integrated_lattices = flex.int() sweep_dir_cryst = flex.std_string() orig_dir = os.path.abspath(os.curdir) rmsds = flex.vec3_double() cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() nproc = easy_mp.get_processes(libtbx.Auto) # nproc = 1 results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True, ) for result in results: if result is None: continue sweep_directories.append(result.sweep_dir) templates.append(result.template) n_strong_spots.append(result.n_strong_spots) n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4) n_unindexed_spots.append(result.n_unindexed_spots) n_indexed_lattices.append(result.n_indexed_lattices) n_integrated_lattices.append(result.n_integrated_lattices) d_strong_spots_50th_percentile.append( result.d_strong_spots_50th_percentile) d_strong_spots_95th_percentile.append( result.d_strong_spots_95th_percentile) d_strong_spots_99th_percentile.append( result.d_strong_spots_99th_percentile) cell_params.extend(result.cell_params) n_indexed.extend(result.n_indexed) d_min_indexed.extend(result.d_min_indexed) rmsds.extend(result.rmsds) sweep_dir_cryst.extend(result.sweep_dir_cryst) table_data = [( "sweep_dir", "template", "#strong_spots", "#unindexed_spots", "#lattices", "d_spacing_50th_percentile", "d_spacing_95th_percentile", "d_spacing_99th_percentile", )] for i in range(len(sweep_directories)): table_data.append(( sweep_directories[i], templates[i], str(n_strong_spots[i]), str(n_unindexed_spots[i]), str(n_indexed_lattices[i]), str(d_strong_spots_50th_percentile[i]), str(d_strong_spots_95th_percentile[i]), str(d_strong_spots_99th_percentile[i]), )) with open("results.txt", "wb") as f: print(table_utils.format(table_data, has_header=True, justify="right"), file=f) table_data = [( "sweep_dir", "cell_a", "cell_b", "cell_c", "alpha", "beta", "gamma", "#indexed_reflections", "d_min_indexed", "rmsd_x", "rmsd_y", "rmsd_phi", )] for i in range(len(cell_params)): table_data.append(( sweep_dir_cryst[i], str(cell_params[i][0]), str(cell_params[i][1]), str(cell_params[i][2]), str(cell_params[i][3]), str(cell_params[i][4]), str(cell_params[i][5]), str(n_indexed[i]), str(d_min_indexed[i]), str(rmsds[i][0]), str(rmsds[i][1]), str(rmsds[i][2]), )) with open("results_indexed.txt", "wb") as f: print(table_utils.format(table_data, has_header=True, justify="right"), file=f) cell_a = flex.double([params[0] for params in cell_params]) cell_b = flex.double([params[1] for params in cell_params]) cell_c = flex.double([params[2] for params in cell_params]) cell_alpha = flex.double([params[3] for params in cell_params]) cell_beta = flex.double([params[4] for params in cell_params]) cell_gamma = flex.double([params[5] for params in cell_params]) from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc("font", family="serif") pyplot.rc("font", serif="Times New Roman") red, blue = "#B2182B", "#2166AC" hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( hist.slot_centers(), hist.slots(), width=0.75 * hist.slot_width(), color=blue, edgecolor=blue, ) ax.set_xlabel("Spot count") ax.set_ylabel("Frequency") pdf = PdfPages("spot_count_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() hist = flex.histogram(n_indexed_lattices.as_double(), n_slots=flex.max(n_indexed_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( range(int(hist.data_max())), hist.slots(), width=0.75 * hist.slot_width(), align="center", color=blue, edgecolor=blue, ) ax.set_xlim(-0.5, hist.data_max() - 0.5) ax.set_xticks(range(0, int(hist.data_max()))) ax.set_xlabel("Number of indexed lattices") ax.set_ylabel("Frequency") pdf = PdfPages("n_indexed_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() if flex.max(n_integrated_lattices) > 0: hist = flex.histogram(n_integrated_lattices.as_double(), n_slots=flex.max(n_integrated_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( range(int(hist.data_max())), hist.slots(), width=0.75 * hist.slot_width(), align="center", color=blue, edgecolor=blue, ) ax.set_xlim(-0.5, hist.data_max() - 0.5) ax.set_xticks(range(0, int(hist.data_max()))) ax.set_xlabel("Number of integrated lattices") ax.set_ylabel("Frequency") pdf = PdfPages("n_integrated_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() # pyplot.show() fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False) for i, cell_param in enumerate( (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)): ax = axes.flat[i] flex.min_max_mean_double(cell_param).show() print(flex.median(cell_param)) hist = flex.histogram(cell_param, n_slots=20) hist.show() ax.bar( hist.slot_centers(), hist.slots(), width=0.75 * hist.slot_width(), color=blue, edgecolor=blue, ) ax.set_xlabel("Cell parameter") ax.set_ylabel("Frequency") pyplot.tight_layout() pdf = PdfPages("cell_parameters.pdf") pdf.savefig(fig) pdf.close()
def get_histogram(d_star_sq, target_n_per_bin=20, max_slots=20, min_slots=5): n_slots = len(d_star_sq) // target_n_per_bin n_slots = min(n_slots, max_slots) n_slots = max(n_slots, min_slots) return flex.histogram(d_star_sq, n_slots=n_slots)
def run_with_preparsed(experiments, reflections, params): from dxtbx.model import ExperimentList from scitbx.math import five_number_summary print("Found", len(reflections), "reflections", "and", len(experiments), "experiments") filtered_reflections = flex.reflection_table() filtered_experiments = ExperimentList() skipped_reflections = flex.reflection_table() skipped_experiments = ExperimentList() if params.detector is not None: culled_reflections = flex.reflection_table() culled_experiments = ExperimentList() detector = experiments.detectors()[params.detector] for expt_id, experiment in enumerate(experiments): refls = reflections.select(reflections['id'] == expt_id) if experiment.detector is detector: culled_experiments.append(experiment) refls['id'] = flex.int(len(refls), len(culled_experiments) - 1) culled_reflections.extend(refls) else: skipped_experiments.append(experiment) refls['id'] = flex.int(len(refls), len(skipped_experiments) - 1) skipped_reflections.extend(refls) print("RMSD filtering %d experiments using detector %d, out of %d" % (len(culled_experiments), params.detector, len(experiments))) reflections = culled_reflections experiments = culled_experiments difference_vector_norms = (reflections['xyzcal.mm'] - reflections['xyzobs.mm.value']).norms() if params.max_delta is not None: sel = difference_vector_norms <= params.max_delta reflections = reflections.select(sel) difference_vector_norms = difference_vector_norms.select(sel) data = flex.double() counts = flex.double() for i in range(len(experiments)): dvns = difference_vector_norms.select(reflections['id'] == i) counts.append(len(dvns)) if len(dvns) == 0: data.append(0) continue rmsd = math.sqrt(flex.sum_sq(dvns) / len(dvns)) data.append(rmsd) data *= 1000 subset = data.select(counts > 0) print(len(subset), "experiments with > 0 reflections") if params.show_plots: h = flex.histogram(subset, n_slots=40) fig = plt.figure() ax = fig.add_subplot('111') ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') plt.title("Histogram of %d image RMSDs" % len(subset)) fig = plt.figure() plt.boxplot(subset, vert=False) plt.title("Boxplot of %d image RMSDs" % len(subset)) plt.show() outliers = counts == 0 min_x, q1_x, med_x, q3_x, max_x = five_number_summary(subset) print( "Five number summary of RMSDs (microns): min %.1f, q1 %.1f, med %.1f, q3 %.1f, max %.1f" % (min_x, q1_x, med_x, q3_x, max_x)) iqr_x = q3_x - q1_x cut_x = params.iqr_multiplier * iqr_x outliers.set_selected(data > q3_x + cut_x, True) #outliers.set_selected(col < q1_x - cut_x, True) # Don't throw away the images that are outliers in the 'good' direction! for i in range(len(experiments)): if outliers[i]: continue refls = reflections.select(reflections['id'] == i) refls['id'] = flex.int(len(refls), len(filtered_experiments)) filtered_reflections.extend(refls) filtered_experiments.append(experiments[i]) #import IPython;IPython.embed() zeroes = counts == 0 n_zero = len(counts.select(zeroes)) print( "Removed %d bad experiments and %d experiments with zero reflections, out of %d (%%%.1f)" % (len(experiments) - len(filtered_experiments) - n_zero, n_zero, len(experiments), 100 * ((len(experiments) - len(filtered_experiments)) / len(experiments)))) if params.detector is not None: crystals = filtered_experiments.crystals() for expt_id, experiment in enumerate(skipped_experiments): if experiment.crystal in crystals: filtered_experiments.append(experiment) refls = skipped_reflections.select( skipped_reflections['id'] == expt_id) refls['id'] = flex.int(len(refls), len(filtered_experiments) - 1) filtered_reflections.extend(refls) if params.delta_psi_filter is not None: delta_psi = filtered_reflections['delpsical.rad'] * 180 / math.pi sel = (delta_psi <= params.delta_psi_filter) & ( delta_psi >= -params.delta_psi_filter) l = len(filtered_reflections) filtered_reflections = filtered_reflections.select(sel) print("Filtering by delta psi, removing %d out of %d reflections" % (l - len(filtered_reflections), l)) print("Final experiment count", len(filtered_experiments)) return filtered_experiments, filtered_reflections
def run(self): ''' Parse the options. ''' from dials.util.options import flatten_experiments, flatten_reflections from dxtbx.model.experiment.experiment_list import ExperimentList from scitbx.math import five_number_summary # Parse the command line arguments params, options = self.parser.parse_args(show_diff_phil=True) self.params = params experiments = flatten_experiments(params.input.experiments) reflections = flatten_reflections(params.input.reflections) assert len(reflections) == 1 reflections = reflections[0] print "Found", len(reflections), "reflections", "and", len( experiments), "experiments" difference_vector_norms = (reflections['xyzcal.mm'] - reflections['xyzobs.mm.value']).norms() data = flex.double() counts = flex.double() for i in xrange(len(experiments)): dvns = difference_vector_norms.select(reflections['id'] == i) counts.append(len(dvns)) if len(dvns) == 0: data.append(0) continue rmsd = math.sqrt(flex.sum_sq(dvns) / len(dvns)) data.append(rmsd) data *= 1000 subset = data.select(counts > 0) print len(subset), "experiments with > 0 reflections" if params.show_plots: h = flex.histogram(subset, n_slots=40) fig = plt.figure() ax = fig.add_subplot('111') ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') plt.title("Histogram of %d image RMSDs" % len(subset)) fig = plt.figure() plt.boxplot(subset, vert=False) plt.title("Boxplot of %d image RMSDs" % len(subset)) plt.show() outliers = counts == 0 min_x, q1_x, med_x, q3_x, max_x = five_number_summary(subset) print "Five number summary of RMSDs (microns): min %.1f, q1 %.1f, med %.1f, q3 %.1f, max %.1f" % ( min_x, q1_x, med_x, q3_x, max_x) iqr_x = q3_x - q1_x cut_x = params.iqr_multiplier * iqr_x outliers.set_selected(data > q3_x + cut_x, True) #outliers.set_selected(col < q1_x - cut_x, True) # Don't throw away the images that are outliers in the 'good' direction! filtered_reflections = flex.reflection_table() filtered_experiments = ExperimentList() for i in xrange(len(experiments)): if outliers[i]: continue refls = reflections.select(reflections['id'] == i) refls['id'] = flex.int(len(refls), len(filtered_experiments)) filtered_reflections.extend(refls) filtered_experiments.append(experiments[i]) zeroes = counts == 0 n_zero = len(counts.select(zeroes)) print "Removed %d bad experiments and %d experiments with zero reflections, out of %d (%%%.1f)" % ( len(experiments) - len(filtered_experiments) - n_zero, n_zero, len(experiments), 100 * ((len(experiments) - len(filtered_experiments)) / len(experiments))) from dxtbx.model.experiment.experiment_list import ExperimentListDumper dump = ExperimentListDumper(filtered_experiments) dump.as_json(params.output.filtered_experiments) filtered_reflections.as_pickle(params.output.filtered_reflections)