def r_split(self, other, assume_index_matching=False, use_binning=False): # Used in Boutet et al. (2012), which credit it to Owen et al # (2006). See also R_mrgd_I in Diederichs & Karplus (1997)? # Barends cites Collaborative Computational Project Number 4. The # CCP4 suite: programs for protein crystallography. Acta # Crystallogr. Sect. D-Biol. Crystallogr. 50, 760-763 (1994) and # White, T. A. et al. CrystFEL: a software suite for snapshot # serial crystallography. J. Appl. Cryst. 45, 335–341 (2012). if not use_binning: assert other.indices().size() == self.indices().size() if self.data().size() == 0: return None if assume_index_matching: (o, c) = (self, other) else: (o, c) = self.common_sets(other=other, assert_no_singles=True) # The case where the denominator is less or equal to zero is # pathological and should never arise in practice. den = flex.sum(flex.abs(o.data() + c.data())) assert den > 0 return math.sqrt(2) * flex.sum(flex.abs(o.data() - c.data())) / den assert self.binner is not None results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) results.append( r_split(self.select(sel), other.select(sel), assume_index_matching=assume_index_matching, use_binning=False)) return binned_data(binner=self.binner(), data=results, data_fmt='%7.4f')
def r_split(self, other, assume_index_matching=False, use_binning=False): # Used in Boutet et al. (2012), which credit it to Owen et al # (2006). See also R_mrgd_I in Diederichs & Karplus (1997)? # Barends cites Collaborative Computational Project Number 4. The # CCP4 suite: programs for protein crystallography. Acta # Crystallogr. Sect. D-Biol. Crystallogr. 50, 760-763 (1994) and # White, T. A. et al. CrystFEL: a software suite for snapshot # serial crystallography. J. Appl. Cryst. 45, 335–341 (2012). if not use_binning: assert other.indices().size() == self.indices().size() if self.data().size() == 0: return None if assume_index_matching: (o, c) = (self, other) else: (o, c) = self.common_sets(other=other, assert_no_singles=True) # The case where the denominator is less or equal to zero is # pathological and should never arise in practice. den = flex.sum(flex.abs(o.data() + c.data())) assert den > 0 return math.sqrt(2) * flex.sum(flex.abs(o.data() - c.data())) / den assert self.binner is not None results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) results.append( r_split(self.select(sel), other.select(sel), assume_index_matching=assume_index_matching, use_binning=False) ) return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f")
def r1_factor(self, other, scale_factor=None, assume_index_matching=False, use_binning=False): """Get the R1 factor according to this formula .. math:: R1 = \dfrac{\sum{||F| - k|F'||}}{\sum{|F|}} where F is self.data() and F' is other.data() and k is the factor to put F' on the same scale as F""" assert not use_binning or self.binner() is not None assert other.indices().size() == self.indices().size() if not use_binning: if self.data().size() == 0: return None if assume_index_matching: o, c = self, other else: o, c = self.common_sets(other=other, assert_no_singles=True) o = flex.abs(o.data()) c = flex.abs(c.data()) if scale_factor is None: den = flex.sum(c * c) if den != 0: c *= flex.sum(o * c) / den elif scale_factor is not None: c *= scale_factor return flex.sum(flex.abs(o - c)) / flex.sum(o) results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) results.append(r1_factor(self.select(sel), other.select(sel), scale_factor.data[i_bin], assume_index_matching)) return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f")
def binned_correlation(self, other, include_negatives=False): results = [] bin_count = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) if sel.count(True) == 0: results.append(0.) bin_count.append(0.) continue result_tuple = correlation(self.select(sel), other.select(sel), include_negatives) results.append(result_tuple[2]) bin_count.append(result_tuple[3]) # plots for debugging #from matplotlib import pyplot as plt #plt.plot(flex.log(self.select(sel).data()),flex.log(other.select(sel).data()),"b.") #plt.show() return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f"),\ binned_data(binner=self.binner(), data=bin_count, data_fmt="%7d")
def binned_correlation(self, other, include_negatives=False): results = [] bin_count = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) if sel.count(True) == 0: results.append(0.0) bin_count.append(0.0) continue result_tuple = correlation(self.select(sel), other.select(sel), include_negatives) results.append(result_tuple[2]) bin_count.append(result_tuple[3]) # plots for debugging # from matplotlib import pyplot as plt # plt.plot(flex.log(self.select(sel).data()),flex.log(other.select(sel).data()),"b.") # plt.show() return ( binned_data(binner=self.binner(), data=results, data_fmt="%7.4f"), binned_data(binner=self.binner(), data=bin_count, data_fmt="%7d"), )
def scale_factor(self, this, other, weights=None, cutoff_factor=None, use_binning=False): """ The analytical expression for the least squares scale factor. K = sum(w * yo * yc) / sum(w * yc^2) If the optional cutoff_factor argument is provided, only the reflections whose magnitudes are greater than cutoff_factor * max(yo) will be included in the calculation. """ assert not use_binning or this.binner() is not None if use_binning: assert cutoff_factor is None assert other.size() == this.data().size() if not use_binning: if this.data().size() == 0: return None obs = this.data() calc = other.data() if cutoff_factor is not None: assert cutoff_factor < 1 sel = obs >= flex.max(this.data()) * cutoff_factor obs = obs.select(sel) calc = calc.select(sel) if weights is not None: weights = weights.select(sel) if weights is None: return flex.sum(obs * calc) / flex.sum(flex.pow2(calc)) else: return flex.sum(weights * obs * calc) \ / flex.sum(weights * flex.pow2(calc)) results = [] for i_bin in this.binner().range_all(): sel = this.binner().selection(i_bin) weights_sel = None if weights is not None: weights_sel = weights.select(sel) results.append( self.scale_factor(this.select(sel), other.select(sel), weights_sel)) return binned_data(binner=this.binner(), data=results, data_fmt="%7.4f")
def r1_factor(self, this, other, scale_factor=None, assume_index_matching=False, use_binning=False): """Get the R1 factor according to this formula .. math:: R1 = \dfrac{\sum{||F| - k|F'||}}{\sum{|F|}} where F is this.data() and F' is other.data() and k is the factor to put F' on the same scale as F""" assert not use_binning or this.binner() is not None assert other.indices().size() == this.indices().size() if not use_binning: if this.data().size() == 0: return None if (assume_index_matching): o, c = this, other else: o, c = this.common_sets(other=other, assert_no_singles=True) o = flex.abs(o.data()) c = flex.abs(c.data()) if (scale_factor is None): den = flex.sum(c * c) if (den != 0): c *= (flex.sum(o * c) / den) elif (scale_factor is not None): c *= scale_factor return flex.sum(flex.abs(o - c)) / flex.sum(o) results = [] for i_bin in this.binner().range_all(): sel = this.binner().selection(i_bin) results.append( self.r1_factor(this.select(sel), other.select(sel), scale_factor.data[i_bin], assume_index_matching)) return binned_data(binner=this.binner(), data=results, data_fmt="%7.4f")
def scale_factor(self, other, weights=None, cutoff_factor=None, use_binning=False): """ The analytical expression for the least squares scale factor. K = sum(w * yo * yc) / sum(w * yc^2) If the optional cutoff_factor argument is provided, only the reflections whose magnitudes are greater than cutoff_factor * max(yo) will be included in the calculation. """ assert not use_binning or self.binner() is not None if use_binning: assert cutoff_factor is None assert other.size() == self.data().size() if not use_binning: if self.data().size() == 0: return None obs = self.data() calc = other.data() if cutoff_factor is not None: assert cutoff_factor < 1 sel = obs >= flex.max(self.data()) * cutoff_factor obs = obs.select(sel) calc = calc.select(sel) if weights is not None: weights = weights.select(sel) if weights is None: return flex.sum(obs * calc) / flex.sum(flex.pow2(calc)) else: return flex.sum(weights * obs * calc) / flex.sum(weights * flex.pow2(calc)) results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) weights_sel = None if weights is not None: weights_sel = weights.select(sel) results.append(scale_factor(self.select(sel), other.select(sel), weights_sel)) return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f")
def split_sigma_test(self, other, scale, use_binning=False, show_plot=False): """ Calculates the split sigma ratio test by Peter Zwart: ssr = sum( (Iah-Ibh)^2 ) / sum( sigma_ah^2 + sigma_bh^2) where Iah and Ibh are merged intensities for a given hkl from two halves of a dataset (a and b). Likewise for sigma_ah and sigma_bh. ssr (split sigma ratio) should approximately equal 1 if the errors are correctly estimated. """ assert other.size() == self.data().size() assert (self.indices() == other.indices()).all_eq(True) assert not use_binning or self.binner() is not None if use_binning: results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) i_self = self.select(sel) i_other = other.select(sel) scale_rel = scale.data[i_bin] if i_self.size() == 0: results.append(None) else: results.append( split_sigma_test(i_self, i_other, scale=scale_rel, show_plot=show_plot)) return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f") a_data = self.data() b_data = scale * other.data() a_sigmas = self.sigmas() b_sigmas = scale * other.sigmas() if show_plot: """ # Diagnostic use of the (I - <I>) / sigma distribution, should have mean=0, std=1 a_variance = a_sigmas * a_sigmas b_variance = b_sigmas * b_sigmas mean_num = (a_data/ (a_variance) ) + (b_data/ (b_variance) ) mean_den = (1./ (a_variance) ) + (1./ (b_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / (a_sigmas) stats_a = flex.mean_and_variance(normal_a) print "\nA mean %7.4f std %7.4f"%(stats_a.mean(),stats_a.unweighted_sample_standard_deviation()) order_a = flex.sort_permutation(normal_a) delta_I_b = b_data - mean_values normal_b = delta_I_b / (b_sigmas) stats_b = flex.mean_and_variance(normal_b) print "B mean %7.4f std %7.4f"%(stats_b.mean(),stats_b.unweighted_sample_standard_deviation()) order_b = flex.sort_permutation(normal_b) # plots for debugging from matplotlib import pyplot as plt plt.plot(xrange(len(order_a)),normal_a.select(order_a),"b.") plt.plot(xrange(len(order_b)),normal_b.select(order_b),"r.") plt.show() """ from cctbx.examples.merging.sigma_correction import ccp4_model Correction = ccp4_model() Correction.plots(a_data, b_data, a_sigmas, b_sigmas) #a_new_variance,b_new_variance = Correction.optimize(a_data, b_data, a_sigmas, b_sigmas) #Correction.plots(a_data, b_data, flex.sqrt(a_new_variance), flex.sqrt(b_new_variance)) n = flex.pow(a_data - b_data, 2) d = flex.pow(a_sigmas, 2) + flex.pow(b_sigmas, 2) return flex.sum(n) / flex.sum(d)
def split_sigma_test(self, other, scale, use_binning=False, show_plot=False): """ Calculates the split sigma ratio test by Peter Zwart: ssr = sum( (Iah-Ibh)^2 ) / sum( sigma_ah^2 + sigma_bh^2) where Iah and Ibh are merged intensities for a given hkl from two halves of a dataset (a and b). Likewise for sigma_ah and sigma_bh. ssr (split sigma ratio) should approximately equal 1 if the errors are correctly estimated. """ assert other.size() == self.data().size() assert (self.indices() == other.indices()).all_eq(True) assert not use_binning or self.binner() is not None if use_binning: results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) i_self = self.select(sel) i_other = other.select(sel) scale_rel = scale.data[i_bin] if i_self.size() == 0: results.append(None) else: results.append(split_sigma_test(i_self, i_other, scale=scale_rel, show_plot=show_plot)) return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f") a_data = self.data() b_data = scale * other.data() a_sigmas = self.sigmas() b_sigmas = scale * other.sigmas() if show_plot: """ # Diagnostic use of the (I - <I>) / sigma distribution, should have mean=0, std=1 a_variance = a_sigmas * a_sigmas b_variance = b_sigmas * b_sigmas mean_num = (a_data/ (a_variance) ) + (b_data/ (b_variance) ) mean_den = (1./ (a_variance) ) + (1./ (b_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / (a_sigmas) stats_a = flex.mean_and_variance(normal_a) print "\nA mean %7.4f std %7.4f"%(stats_a.mean(),stats_a.unweighted_sample_standard_deviation()) order_a = flex.sort_permutation(normal_a) delta_I_b = b_data - mean_values normal_b = delta_I_b / (b_sigmas) stats_b = flex.mean_and_variance(normal_b) print "B mean %7.4f std %7.4f"%(stats_b.mean(),stats_b.unweighted_sample_standard_deviation()) order_b = flex.sort_permutation(normal_b) # plots for debugging from matplotlib import pyplot as plt plt.plot(xrange(len(order_a)),normal_a.select(order_a),"b.") plt.plot(xrange(len(order_b)),normal_b.select(order_b),"r.") plt.show() """ from cctbx.examples.merging.sigma_correction import ccp4_model Correction = ccp4_model() Correction.plots(a_data, b_data, a_sigmas, b_sigmas) # a_new_variance,b_new_variance = Correction.optimize(a_data, b_data, a_sigmas, b_sigmas) # Correction.plots(a_data, b_data, flex.sqrt(a_new_variance), flex.sqrt(b_new_variance)) n = flex.pow(a_data - b_data, 2) d = flex.pow(a_sigmas, 2) + flex.pow(b_sigmas, 2) return flex.sum(n) / flex.sum(d)