def scale(self,other): from cctbx import miller matches = miller.match_indices(self.f_model_real.indices(),other.indices()) sel0 = flex.size_t([p[0] for p in matches.pairs()]) sel1 = flex.size_t([p[1] for p in matches.pairs()]) val0 = self.f_model_real.data().select(sel0) val1 = other.data().select(sel1) plot=False if plot: from matplotlib import pyplot as plt plt.plot([-1,4],[-1,4],"g-") plt.plot(flex.log10(val0),flex.log10(val1),"r.") plt.show() from xfel.cxi.cxi_cc import correlation slope,offset,corr,N = correlation( self = self.f_model_real.select(sel0), other = other.select(sel1)) print slope,offset,corr,N if plot: from matplotlib import pyplot as plt plt.plot([-1,4],[-1,4],"g-") plt.plot(flex.log10(val0),flex.log10(slope * val1),"r,") plt.show() return slope
def scale(self, other): from cctbx import miller matches = miller.match_indices(self.f_model_real.indices(), other.indices()) sel0 = flex.size_t([p[0] for p in matches.pairs()]) sel1 = flex.size_t([p[1] for p in matches.pairs()]) val0 = self.f_model_real.data().select(sel0) val1 = other.data().select(sel1) plot = False if plot: from matplotlib import pyplot as plt plt.plot([-1, 4], [-1, 4], "g-") plt.plot(flex.log10(val0), flex.log10(val1), "r.") plt.show() from xfel.cxi.cxi_cc import correlation slope, offset, corr, N = correlation( self=self.f_model_real.select(sel0), other=other.select(sel1)) print slope, offset, corr, N if plot: from matplotlib import pyplot as plt plt.plot([-1, 4], [-1, 4], "g-") plt.plot(flex.log10(val0), flex.log10(slope * val1), "r,") plt.show() return slope
def i_over_sig_i_vs_i_plot(intensities, sigmas): """Plot unscaled I / sigma_adjusted vs unscaled I.""" sel = (intensities > 0) & (sigmas > 0) intensities = intensities.select(sel) sigmas = sigmas.select(sel) x = flex.log10(intensities) y = intensities / sigmas H, xedges, yedges = np.histogram2d(x.as_numpy_array(), y.as_numpy_array(), bins=(200, 200)) nonzeros = np.nonzero(H) z = np.empty(H.shape) z[:] = np.NAN z[nonzeros] = H[nonzeros] return { "i_over_sig_i_vs_i": { "data": [{ "x": xedges.tolist(), "y": yedges.tolist(), "z": z.transpose().tolist(), "type": "heatmap", "name": "Isigma distribution", "colorbar": { "title": "Number of reflections", "titleside": "right", }, "colorscale": "Jet", }], "layout": { "title": u"I/σ(I) vs I", "xaxis": { "title": "log I" }, "yaxis": { "title": u"I/σ(I)" }, }, "help": u"""\ This plot shows the distribution of I/σ(I) as a function of I, which can give indication of the errors within the dataset. The I/σ(I) asymptotic limit can be seen at the plateau in the top-right of the plot, if the measured data are strong enough. [1] Diederichs, K. (2010). Acta Cryst. D, 66(6), 733-740. https://doi.org/10.1107/S0907444910014836 """, } }
def __init__(self, reflections, step_size=45, tolerance=1.5, max_height_fraction=0.25, percentile=None, histogram_binning='linear', nn_per_bin=5): self.tolerance = tolerance # Margin of error for max unit cell estimate from scitbx.array_family import flex NEAR = 10 self.NNBIN = nn_per_bin # target number of neighbors per histogram bin self.histogram_binning = histogram_binning direct = flex.double() if 'entering' in reflections: entering_flags = reflections['entering'] else: entering_flags = flex.bool(reflections.size(), True) rs_vectors = reflections['rlp'] phi_deg = reflections['xyzobs.mm.value'].parts()[2] * (180 / math.pi) d_spacings = flex.double() # nearest neighbor analysis from annlib_ext import AnnAdaptor for imageset_id in range(flex.max(reflections['imageset_id']) + 1): sel_imageset = reflections['imageset_id'] == imageset_id if sel_imageset.count(True) == 0: continue phi_min = flex.min(phi_deg.select(sel_imageset)) phi_max = flex.max(phi_deg.select(sel_imageset)) d_phi = phi_max - phi_min n_steps = max(int(math.ceil(d_phi / step_size)), 1) for n in range(n_steps): sel_step = sel_imageset & (phi_deg >= (phi_min + n * step_size)) & ( phi_deg < (phi_min + (n + 1) * step_size)) for entering in (True, False): sel_entering = sel_step & (entering_flags == entering) if sel_entering.count(True) == 0: continue query = flex.double() query.extend(rs_vectors.select(sel_entering).as_double()) if query.size() == 0: continue IS_adapt = AnnAdaptor(data=query, dim=3, k=1) IS_adapt.query(query) direct.extend(1 / flex.sqrt(IS_adapt.distances)) d_spacings.extend(1 / rs_vectors.norms()) assert len(direct) > NEAR, ( "Too few spots (%d) for nearest neighbour analysis." % len(direct)) perm = flex.sort_permutation(direct) direct = direct.select(perm) d_spacings = d_spacings.select(perm) # eliminate nonsensical direct space distances sel = direct > 1 direct = direct.select(sel) d_spacings = d_spacings.select(sel) if percentile is None: # reject top 1% of longest distances to hopefully get rid of any outliers n = int(math.floor(0.99 * len(direct))) direct = direct[:n] d_spacings = d_spacings[:n] # determine the most probable nearest neighbor distance (direct space) if self.histogram_binning == 'log': hst = flex.histogram(flex.log10(direct), n_slots=int(len(direct) / self.NNBIN)) else: hst = flex.histogram(direct, n_slots=int(len(direct) / self.NNBIN)) centers = hst.slot_centers() if self.histogram_binning == 'log': self.slot_start = flex.double( [10**(s - 0.5 * hst.slot_width()) for s in hst.slot_centers()]) self.slot_end = flex.double( [10**(s + 0.5 * hst.slot_width()) for s in hst.slot_centers()]) self.slot_width = self.slot_end - self.slot_start else: self.slot_start = hst.slot_centers() - 0.5 * hst.slot_width() self.slot_end = hst.slot_centers() + 0.5 * hst.slot_width() self.slot_width = hst.slot_width() self.relative_frequency = hst.slots().as_double() / self.slot_width highest_bin_height = flex.max(self.relative_frequency) if False: # to print out the histogramming analysis smin, smax = flex.min(direct), flex.max(direct) stats = flex.mean_and_variance(direct) import sys out = sys.stdout print >> out, " range: %6.2f - %.2f" % (smin, smax) print >> out, " mean: %6.2f +/- %6.2f on N = %d" % ( stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size()) hst.show(f=out, prefix=" ", format_cutoffs="%6.2f") print >> out, "" if percentile is not None: # determine the nth-percentile direct-space distance perm = flex.sort_permutation(direct, reverse=True) self.max_cell = self.tolerance * direct[perm[int( (1 - percentile) * len(direct))]] else: # choose a max cell based on bins above a given fraction of the highest bin height # given multiple isel = (self.relative_frequency.as_double() > (max_height_fraction * highest_bin_height)).iselection() self.max_cell = (self.tolerance * self.slot_end[int(flex.max(isel.as_double()))]) self.reciprocal_lattice_vectors = rs_vectors self.d_spacings = d_spacings self.direct = direct self.histogram = hst
def __init__(self, reflections, step_size=45, tolerance=1.5, max_height_fraction=0.25, percentile=0.05, histogram_binning='linear'): self.tolerance = tolerance # Margin of error for max unit cell estimate from scitbx.array_family import flex NEAR = 10 self.NNBIN = 5 # target number of neighbors per histogram bin self.histogram_binning = histogram_binning direct = flex.double() if 'entering' in reflections: entering_flags = reflections['entering'] else: entering_flags = flex.bool(reflections.size(), True) rs_vectors = reflections['rlp'] phi_deg = reflections['xyzobs.mm.value'].parts()[2] * (180/math.pi) d_spacings = flex.double() # nearest neighbor analysis from annlib_ext import AnnAdaptor for imageset_id in range(flex.max(reflections['imageset_id'])+1): sel = reflections['imageset_id'] == imageset_id if sel.count(True) == 0: continue phi_min = flex.min(phi_deg.select(sel)) phi_max = flex.max(phi_deg.select(sel)) d_phi = phi_max - phi_min n_steps = max(int(math.ceil(d_phi / step_size)), 1) for n in range(n_steps): sel &= (phi_deg >= (phi_min+n*step_size)) & (phi_deg < (phi_min+(n+1)*step_size)) for entering in (True, False): sel &= entering_flags == entering if sel.count(True) == 0: continue query = flex.double() query.extend(rs_vectors.select(sel).as_double()) if query.size() == 0: continue IS_adapt = AnnAdaptor(data=query,dim=3,k=1) IS_adapt.query(query) direct.extend(1/flex.sqrt(IS_adapt.distances)) d_spacings.extend(1/rs_vectors.norms()) assert len(direct)>NEAR, ( "Too few spots (%d) for nearest neighbour analysis." %len(direct)) perm = flex.sort_permutation(direct) direct = direct.select(perm) d_spacings = d_spacings.select(perm) # reject top 1% of longest distances to hopefully get rid of any outliers n = int(math.floor(0.99*len(direct))) direct = direct[:n] d_spacings = d_spacings[:n] # determine the most probable nearest neighbor distance (direct space) if self.histogram_binning == 'log': hst = flex.histogram( flex.log10(direct), n_slots=int(len(direct)/self.NNBIN)) else: hst = flex.histogram(direct, n_slots=int(len(direct)/self.NNBIN)) centers = hst.slot_centers() if self.histogram_binning == 'log': self.slot_start = flex.double( [10**s for s in hst.slot_centers() - 0.5 * hst.slot_width()]) self.slot_end = flex.double( [10**s for s in hst.slot_centers() + 0.5 * hst.slot_width()]) self.slot_width = self.slot_end - self.slot_start else: self.slot_start = hst.slot_centers() - 0.5 * hst.slot_width() self.slot_end = hst.slot_centers() + 0.5 * hst.slot_width() self.slot_width = hst.slot_width() self.relative_frequency = hst.slots().as_double()/self.slot_width highest_bin_height = flex.max(self.relative_frequency) if False: # to print out the histogramming analysis smin, smax = flex.min(direct), flex.max(direct) stats = flex.mean_and_variance(direct) import sys out = sys.stdout print >> out, " range: %6.2f - %.2f" % (smin, smax) print >> out, " mean: %6.2f +/- %6.2f on N = %d" % ( stats.mean(), stats.unweighted_sample_standard_deviation(), direct.size()) hst.show(f=out, prefix=" ", format_cutoffs="%6.2f") print >> out, "" # choose a max cell based on bins above a given fraction of the highest bin height # given multiple isel = (self.relative_frequency.as_double() > ( max_height_fraction * highest_bin_height)).iselection() self.max_cell = ( self.tolerance * self.slot_end[int(flex.max(isel.as_double()))]) # determine the 5th-percentile direct-space distance perm = flex.sort_permutation(direct, reverse=True) self.percentile = direct[perm[int(percentile * len(direct))]] self.reciprocal_lattice_vectors = rs_vectors self.d_spacings = d_spacings self.direct = direct self.histogram = hst