def print_stats_on_matches(self): l = self.get_matches() nref = len(l) from libtbx.table_utils import simple_table from scitbx.math import five_number_summary twotheta_resid = l['2theta_resid'] w_2theta = l['2theta.weights'] msg = "\nSummary statistics for {0} observations".format(nref) +\ " matched to predictions:" header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] try: row_data = five_number_summary(twotheta_resid) rows.append(["2theta_c - 2theta_o (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_2theta) rows.append(["2theta weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data]) st = simple_table(rows, header) except IndexError: # zero length reflection list logger.warning( "Unable to calculate summary statistics for zero observations") return logger.info(msg) logger.info(st.format()) logger.info("")
def print_stats_on_matches(self): l = self.get_matches() nref = len(l) from libtbx.table_utils import simple_table from scitbx.math import five_number_summary twotheta_resid = l['2theta_resid'] w_2theta = l['2theta.weights'] msg = "\nSummary statistics for {0} observations".format(nref) +\ " matched to predictions:" header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] try: row_data = five_number_summary(twotheta_resid) rows.append(["2theta_c - 2theta_o (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_2theta) rows.append(["2theta weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data]) st = simple_table(rows, header) except IndexError: # zero length reflection list logger.warning("Unable to calculate summary statistics for zero observations") return logger.info(msg) logger.info(st.format()) logger.info("")
def print_stats_on_matches(self): l = self.get_matches() nref = len(l) if nref == 0: logger.warning( "Unable to calculate summary statistics for zero observations") return twotheta_resid = l["2theta_resid"] w_2theta = l["2theta.weights"] msg = ("\nSummary statistics for {} observations".format(nref) + " matched to predictions:") header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] row_data = five_number_summary(twotheta_resid) rows.append(["2theta_c - 2theta_o (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_2theta) rows.append(["2theta weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data]) st = simple_table(rows, header) logger.info(msg) logger.info(st.format()) logger.info("")
def print_stats_on_matches(self): l = self.get_matches() nref = len(l) if nref == 0: logger.warning( "Unable to calculate summary statistics for zero observations" ) return twotheta_resid = l["2theta_resid"] w_2theta = l["2theta.weights"] msg = ( f"\nSummary statistics for {nref} observations" + " matched to predictions:" ) header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] row_data = five_number_summary(twotheta_resid) rows.append( ["2theta_c - 2theta_o (deg)"] + [f"{e * RAD2DEG:.4g}" for e in row_data] ) row_data = five_number_summary(w_2theta) rows.append(["2theta weights"] + [f"{e * DEG2RAD ** 2:.4g}" for e in row_data]) logger.info(msg) logger.info(tabulate(rows, header) + "\n")
def adjust_errors(self, dI_derrorterms = None, compute_sums = True): """ Propagate errors to the scaled and merged intensity errors based on statistical error propagation. This uses 1) and estimate of the errors in the post-refined parametes from the observed population and 2) partial derivatives of the scaled intensity with respect to each of the post-refined parameters. """ assert self.scaler.params.postrefinement.algorithm == 'rs' refls = self.scaler.ISIGI ct = self.scaler.crystal_table if self.error_terms is None: self.initial_estimates() if dI_derrorterms is None: dI_derrorterms = self.dI_derrorterms() dI_dIobs, dI_dthetax, dI_dthetay, dI_dlambda, dI_deff = dI_derrorterms[0:5] dI_dgstar = dI_derrorterms[5:] sigma_Iobs = refls['scaled_intensity']/refls['isigi'] r = self.r # Propagate errors refls['isigi'] = refls['scaled_intensity'] / \ flex.sqrt((sigma_Iobs**2 * dI_dIobs**2) + sum([self.error_terms.sigma_gstar[j]**2 * dI_dgstar[j]**2 for j in xrange(len(self.error_terms.sigma_gstar))]) + (self.error_terms.sigma_thetax**2 * dI_dthetax**2) + (self.error_terms.sigma_thetay**2 * dI_dthetay**2) + (self.error_terms.sigma_lambda**2 * dI_dlambda**2) + (self.error_terms.sigma_deff**2 * dI_deff**2)) if self.verbose: # Show results of propagation from scitbx.math import five_number_summary all_data = [(refls['iobs'], "Iobs"), (sigma_Iobs, "Original errors"), (1/r['D'], "Total scale factor"), (refls['iobs']/r['D'], "Inflated intensities"), (refls['scaled_intensity']/refls['isigi'], "Propagated errors"), (flex.sqrt(sigma_Iobs**2 * dI_dIobs**2), "Iobs term"), (flex.sqrt(self.error_terms.sigma_thetax**2 * dI_dthetax**2), "Thetax term"), (flex.sqrt(self.error_terms.sigma_thetay**2 * dI_dthetay**2), "Thetay term"), (flex.sqrt(self.error_terms.sigma_lambda**2 * dI_dlambda**2), "Wavelength term"), (flex.sqrt(self.error_terms.sigma_deff**2 * dI_deff**2), "Deff term")] + \ [(flex.sqrt(self.error_terms.sigma_gstar[j]**2 * dI_dgstar[j]**2), "Gstar term %d"%j) for j in xrange(len(self.error_terms.sigma_gstar))] print >> self.log, "%20s % 20s % 20s % 20s"%("Data name","Quartile 1", "Median", "Quartile 3") for data, title in all_data: fns = five_number_summary(data) print >> self.log, "%20s % 20d % 20d % 20d"%(title, fns[1], fns[2], fns[3]) if compute_sums: # Final terms for cxi.merge self.scaler.summed_weight= flex.double(self.scaler.n_refl, 0.) self.scaler.summed_wt_I = flex.double(self.scaler.n_refl, 0.) Intensity = refls['scaled_intensity'] sigma = Intensity / refls['isigi'] variance = sigma * sigma for i in xrange(len(refls)): j = refls['miller_id'][i] self.scaler.summed_wt_I[j] += Intensity[i] / variance[i] self.scaler.summed_weight[j] += 1 / variance[i]
def tanh_fit(x, y, iqr_multiplier=None): from scitbx.math import curve_fitting tf = curve_fitting.tanh_fit(x, y) f = curve_fitting.tanh(*tf.params) if iqr_multiplier is not None: assert iqr_multiplier > 0 yc = f(x) dy = y - yc from scitbx.math import five_number_summary min_x, q1_x, med_x, q3_x, max_x = five_number_summary(dy) iqr_x = q3_x - q1_x cut_x = iqr_multiplier * iqr_x outliers = (dy > q3_x + cut_x) | (dy < q1_x - cut_x) if outliers.count(True) > 0: xo = x.select(~outliers) yo = y.select(~outliers) tf = curve_fitting.tanh_fit(xo, yo) f = curve_fitting.tanh(*tf.params) return f(x)
def tanh_fit(x, y, iqr_multiplier=None): """ Fit a tanh function to the values y(x) and return this fit x, y should be iterables containing floats of the same size. This is used for fitting a curve to CC½. """ tf = curve_fitting.tanh_fit(x, y) f = curve_fitting.tanh(*tf.params) if iqr_multiplier: assert iqr_multiplier > 0 yc = f(x) dy = y - yc min_x, q1_x, med_x, q3_x, max_x = five_number_summary(dy) iqr_x = q3_x - q1_x cut_x = iqr_multiplier * iqr_x outliers = (dy > q3_x + cut_x) | (dy < q1_x - cut_x) if outliers.count(True) > 0: xo = x.select(~outliers) yo = y.select(~outliers) tf = curve_fitting.tanh_fit(xo, yo) f = curve_fitting.tanh(*tf.params) return f(x)
def unit_cell_histogram(self, plot_name=None): uc_params = [flex.double() for i in range(6)] for expt in self._data_manager.experiments: uc = expt.crystal.get_unit_cell() for i in range(6): uc_params[i].append(uc.parameters()[i]) iqr_ratio = 1.5 outliers = flex.bool(uc_params[0].size(), False) for p in uc_params: from scitbx.math import five_number_summary min_x, q1_x, med_x, q3_x, max_x = five_number_summary(p) logger.info( "Five number summary: min %.2f, q1 %.2f, med %.2f, q3 %.2f, max %.2f" % (min_x, q1_x, med_x, q3_x, max_x) ) iqr_x = q3_x - q1_x if iqr_x < 1e-6: continue cut_x = iqr_ratio * iqr_x outliers.set_selected(p > q3_x + cut_x, True) outliers.set_selected(p < q1_x - cut_x, True) logger.info("Identified %i unit cell outliers" % outliers.count(True)) plot_uc_histograms(uc_params, outliers)
def reject_outliers(self, data): from scitbx.math import five_number_summary min_x, q1_x, med_x, q3_x, max_x = five_number_summary(data) #print "Five number summary: min %.1f, q1 %.1f, med %.1f, q3 %.1f, max %.1f"%(min_x, q1_x, med_x, q3_x, max_x) iqr_x = q3_x - q1_x cut_x = 1.5 * iqr_x outliers = flex.bool(len(data), False) outliers.set_selected(data > q3_x + cut_x, True) outliers.set_selected(data < q1_x - cut_x, True) return outliers
def _detect_outliers(self, cols): from scitbx.math import five_number_summary outliers = flex.bool(len(cols[0]), False) for col in cols: min_x, q1_x, med_x, q3_x, max_x = five_number_summary(col) iqr_x = q3_x - q1_x cut_x = self._iqr_multiplier * iqr_x outliers.set_selected(col > q3_x + cut_x, True) outliers.set_selected(col < q1_x - cut_x, True) return outliers
def width(self): if len(self.dvals) < 3: return 999 _, q1, _, q3, _ = five_number_summary(self.dvals) iqr = q3 - q1 sel_lt = self.dvals < q3 + 1.5*iqr sel_gt = self.dvals > q1 - 1.5*iqr sel = sel_lt & sel_gt if sel.count(True) < 0.8*self.target_refl_count: return 999 else: result = self.dvals.select(sel).sample_standard_deviation() print(f'width {result:.5f} from {sel.count(True)} dvals') return result
def reject_outliers(self, data, iqr_ratio=1.5): eps = 1e-6 outliers = flex.bool(len(data), False) if iqr_ratio is None: return outliers from scitbx.math import five_number_summary min_x, q1_x, med_x, q3_x, max_x = five_number_summary(data) #print "Five number summary: min %.1f, q1 %.1f, med %.1f, q3 %.1f, max %.1f"%(min_x, q1_x, med_x, q3_x, max_x) iqr_x = q3_x - q1_x cut_x = iqr_ratio * iqr_x outliers.set_selected(data > q3_x + cut_x + eps, True) outliers.set_selected(data < q1_x - cut_x - eps, True) #print "Rejecting", outliers.count(True), "out of", len(outliers) return outliers
def outlier_selection(uc_params, iqr_ratio=1.5): outliers = flex.bool(uc_params[0].size(), False) for p in uc_params: min_x, q1_x, med_x, q3_x, max_x = five_number_summary(p) logger.info( "Five number summary: min %.2f, q1 %.2f, med %.2f, q3 %.2f, max %.2f" % (min_x, q1_x, med_x, q3_x, max_x)) iqr_x = q3_x - q1_x if iqr_x < 1e-6: continue cut_x = iqr_ratio * iqr_x outliers.set_selected(p > q3_x + cut_x, True) outliers.set_selected(p < q1_x - cut_x, True) logger.info("Identified %i unit cell outliers" % outliers.count(True)) return outliers
def run(params): node_names = get_log() counter = 0 root = params.input_path good_total = fail_total = 0 good_elapsed = flex.double() good_channels = flex.double() good_logger = flex.double() all_rank = flex.int() channels_rank = flex.int() logger_rank = flex.int() device_elapsed = dict() for rank in range(params.ranks): filename = "rank_%d.log" % (rank) if 'rank' not in filename: continue node_num = rank // params.ranks_per_device // params.devices_per_node device_num = (rank // params.ranks_per_device) % params.devices_per_node device_addr = (node_num, device_num) device_elapsed[device_addr] = device_elapsed.get(device_addr, []) counter += 1 if counter % 100 == 1: print(filename, counter) for line in open(os.path.join(root, filename)): if not line.startswith('idx------finis-------->'): continue try: _, _, _, _, ts, _, elapsed = line.strip().split() epoch_finis = float(ts) except ValueError: continue elapsed = float(elapsed) device_elapsed[device_addr].append(elapsed) good_elapsed.append(elapsed) all_rank.append(rank) print("Rank", rank, "node", node_num, node_names[node_num], "device", device_num) print("There are %d images" % (len(all_rank))) print("There are %d device addresses" % (len(device_elapsed))) for node_num, device_num in device_elapsed: good_elapsed = device_elapsed[(node_num, device_num)] sorted_elapsed = sorted(good_elapsed) print("Median elapsed", "node", node_num, node_names[node_num], "device", device_num, "is %.4f" % (sorted_elapsed[len(sorted_elapsed) // 2]), "5# summary of %d times:" % (len(good_elapsed)), ["%.4f" % a for a in five_number_summary(good_elapsed)])
def _find_peaks(self, grid_real, d_min): grid_real_binary = grid_real.deep_copy() rmsd = math.sqrt( flex.mean( flex.pow2(grid_real_binary.as_1d() - flex.mean(grid_real_binary.as_1d())))) grid_real_binary.set_selected( grid_real_binary < (self._params.rmsd_cutoff) * rmsd, 0) grid_real_binary.as_1d().set_selected(grid_real_binary.as_1d() > 0, 1) grid_real_binary = grid_real_binary.iround() from cctbx import masks # real space FFT grid dimensions cell_lengths = [self._n_points * d_min / 2 for i in range(3)] self._fft_cell = uctbx.unit_cell(cell_lengths + [90] * 3) flood_fill = masks.flood_fill(grid_real_binary, self._fft_cell) if flood_fill.n_voids() < 4: # Require at least peak at origin and one peak for each basis vector raise indexing.DialsIndexError( "Indexing failed: fft3d peak search failed to find sufficient number of peaks." ) # the peak at the origin might have a significantly larger volume than the # rest so exclude any anomalously large peaks from determining minimum volume from scitbx.math import five_number_summary outliers = flex.bool(flood_fill.n_voids(), False) grid_points_per_void = flood_fill.grid_points_per_void() min_x, q1_x, med_x, q3_x, max_x = five_number_summary( grid_points_per_void) iqr_multiplier = 5 iqr_x = q3_x - q1_x cut_x = iqr_multiplier * iqr_x outliers.set_selected( grid_points_per_void.as_double() > (q3_x + cut_x), True) # print q3_x + cut_x, outliers.count(True) isel = (grid_points_per_void > int( self._params.peak_volume_cutoff * flex.max(grid_points_per_void.select(~outliers)))).iselection() sites = flood_fill.centres_of_mass_frac().select(isel) volumes = flood_fill.grid_points_per_void().select(isel) return sites, volumes
def print_stats_on_matches(self): """Print some basic statistics on the matches""" l = self.get_matches() nref = len(l) if nref == 0: logger.warning( "Unable to calculate summary statistics for zero observations" ) return from libtbx.table_utils import simple_table from scitbx.math import five_number_summary try: x_resid = l["x_resid"] y_resid = l["y_resid"] delpsi = l["delpsical.rad"] w_x, w_y, _ = l["xyzobs.mm.weights"].parts() w_delpsi = l["delpsical.weights"] except KeyError: return header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] row_data = five_number_summary(x_resid) rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(y_resid) rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(delpsi) rows.append(["DeltaPsi (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_x) rows.append(["X weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_y) rows.append(["Y weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_delpsi) rows.append( ["DeltaPsi weights"] + ["%.4g" % (e * DEG2RAD ** 2) for e in row_data] ) msg = ( "\nSummary statistics for {} observations".format(nref) + " matched to predictions:" ) logger.info(msg) st = simple_table(rows, header) logger.info(st.format()) logger.info("")
def show_image_statistics(experiments, im_type): if im_type == "raw": raw = True elif im_type == "corrected": raw = False else: raise ValueError(f"Unknown im_type: {im_type}") # To show image statistics, check_format has to be true. So we have to reinstatiate # the experiment list here try: experiments = ExperimentListFactory.from_json(experiments.as_json(), check_format=True) except OSError as e: raise Sorry( f"Unable to read image data. Please check {e.filename} is accessible" ) print(f"Five number summary of the {im_type} images") for i_expt, expt in enumerate(experiments): for i in range(len(expt.imageset)): identifier = os.path.basename( expt.imageset.get_image_identifier(i)) if raw: pnl_data = expt.imageset.get_raw_data(i) else: pnl_data = expt.imageset.get_corrected_data(i) if not isinstance(pnl_data, tuple): pnl_data = (pnl_data, ) flat_data = pnl_data[0].as_1d() for p in pnl_data[1:]: flat_data.extend(p.as_1d()) fns = five_number_summary(flat_data) print( "{}: Min: {:.1f} Q1: {:.1f} Med: {:.1f} Q3: {:.1f} Max: {:.1f}" .format(identifier, *fns))
def print_stats_on_matches(self): """Print some basic statistics on the matches""" l = self.get_matches() nref = len(l) if nref == 0: logger.warning( "Unable to calculate summary statistics for zero observations" ) return try: x_resid = l["x_resid"] y_resid = l["y_resid"] phi_resid = l["phi_resid"] w_x, w_y, w_phi = l["xyzobs.mm.weights"].parts() except KeyError: return msg = ( "\nSummary statistics for {} observations".format(nref) + " matched to predictions:" ) header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] row_data = five_number_summary(x_resid) rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(y_resid) rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(phi_resid) rows.append(["Phic - Phio (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_x) rows.append(["X weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_y) rows.append(["Y weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_phi) rows.append(["Phi weights"] + ["%.4g" % (e * DEG2RAD ** 2) for e in row_data]) st = simple_table(rows, header) logger.info(msg) logger.info(st.format()) logger.info("")
def print_stats_on_matches(self): """Print some basic statistics on the matches""" l = self.get_matches() nref = len(l) if nref == 0: logger.warning( "Unable to calculate summary statistics for zero observations") return try: x_resid = l["x_resid"] y_resid = l["y_resid"] phi_resid = l["phi_resid"] w_x, w_y, w_phi = l["xyzobs.mm.weights"].parts() except KeyError: return msg = (f"\nSummary statistics for {nref} observations" + " matched to predictions:") header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] row_data = five_number_summary(x_resid) rows.append(["Xc - Xo (mm)"] + [f"{e:.4g}" for e in row_data]) row_data = five_number_summary(y_resid) rows.append(["Yc - Yo (mm)"] + [f"{e:.4g}" for e in row_data]) row_data = five_number_summary(phi_resid) rows.append(["Phic - Phio (deg)"] + [f"{e * RAD2DEG:.4g}" for e in row_data]) row_data = five_number_summary(w_x) rows.append(["X weights"] + [f"{e:.4g}" for e in row_data]) row_data = five_number_summary(w_y) rows.append(["Y weights"] + [f"{e:.4g}" for e in row_data]) row_data = five_number_summary(w_phi) rows.append(["Phi weights"] + [f"{e * DEG2RAD ** 2:.4g}" for e in row_data]) logger.info(msg) logger.info(dials.util.tabulate(rows, header, numalign="right") + "\n")
def run(params): script_start, script_finis = get_log(params) counter = 0 datum = None root=params.input_path fig_object = plt.figure() good_total = fail_total = 0 good_timepoints = flex.double() good_elapsed = flex.double() good_channels = flex.double() good_logger = flex.double() all_rank = flex.int() channels_rank = flex.int() logger_rank = flex.int() for filename in os.listdir(root): if os.path.splitext(filename)[1] != '.log': continue if 'rank' not in filename: continue rank = int(filename.split('_')[1].split('.')[0]) counter += 1 if counter%100==1: print (filename, counter) for line in open(os.path.join(root,filename)): if line.startswith('datetime for channels'): goodtime = float(line.split()[6]) good_channels.append(goodtime) channels_rank.append(rank) if "finished with the rank logger" in line: goodtime = float(line.split()[1]) good_logger.append(goodtime) logger_rank.append(rank) if not line.startswith('idx------finis-------->'): continue try: _, _, _, _, ts, _, elapsed = line.strip().split() epoch_finis = float(ts) except ValueError: continue elapsed = float(elapsed) epoch_start = epoch_finis - elapsed if datum is None: datum = epoch_start datum = min(datum, epoch_start) good_timepoints.append(epoch_finis) good_elapsed.append(elapsed) all_rank.append(rank) try: chanx,chany,sbx,sby = get_channcalc(params) plt.plot(chanx-datum, chany, 'c.', markersize="0.8") plt.plot(sbx-datum, sby, 'b.', markersize="0.8") except Exception: pass plt.plot(good_channels-datum, channels_rank, 'r.', markersize="1") plt.plot(good_timepoints-datum, all_rank, 'g.', markersize="1") plt.plot(good_logger-datum, logger_rank, 'k.', markersize="0.8") good_total = len(good_timepoints) max_rank = max(all_rank) sorted_elapsed = sorted(list(good_elapsed)) print ("the median weather time is %.5f"%(sorted_elapsed[len(sorted_elapsed)//2]), "for job", os.path.basename(os.path.abspath("."))) print("Five number summary of %d good image processing times:"%good_total, ["%.5f"%a for a in five_number_summary(good_elapsed)]) plt.plot([0., max(good_timepoints)-datum],[-(1./30.)*max_rank,-(1./30.)*max_rank], 'r-', label="foreach image") print ("The total envelope time is %.1f seconds"%(max(good_timepoints)-datum)) plt.xlabel('Wall time (sec)') plt.ylabel('MPI Rank Number') mpi_finish, mpi_elapse = get_MPI_time() mpi_start = mpi_finish - mpi_elapse plt.plot([mpi_finish-mpi_elapse-datum, mpi_finish-datum],[-(2./30.)*max_rank,-(2./30.)*max_rank], color = "orange", label="MPI comm") print ("The total MPI communicator time is %.1f seconds, with %.1f sec before 'foreach' and %.1f sec trailing"%( mpi_elapse, datum - mpi_start, mpi_finish - max(good_timepoints) )) py_finish, py_elapse = get_py_time() py_start = py_finish - py_elapse plt.plot([py_finish-py_elapse-datum, py_finish-datum],[-(3./30.)*max_rank,-(3./30.)*max_rank], color = "blue", label="Python time") print ("The total Python time is %.1f seconds, with %.1f sec for imports and %.1f sec trailing"%( py_elapse, mpi_start - py_start, py_finish - mpi_finish )) if script_start is not None: plt.plot([script_start-datum, script_finis-datum],[-(4./30.)*max_rank,-(4./30.)*max_rank], color = "magenta", label="jsrun time") print ("The total script time is %.1f seconds, with %.1f sec for ahead and %.1f sec trailing"%( script_finis - script_start, py_start - script_start, script_finis - py_finish )) print ("""A: startup jsrun %6.2f"""%(py_start - script_start)) print ("""B: Python imports %6.2f C: MPI gather SF %6.2f D: MPI broadcast %6.2f E: logger redirect%6.2f, mean %6.2f F: set CUDA device%6.2f G: big data to GPU%6.2f, mean %6.2f """%(mpi_start - py_start, flex.max(chanx) - mpi_start, flex.max(sbx) - flex.max(chanx), flex.max(good_logger) - flex.max(sbx), flex.mean(good_logger - flex.max(sbx)), datum - flex.max(good_logger), flex.max(good_channels-datum), flex.mean(good_channels-datum) ) ) plt.title(params.plot_title + " " + os.path.basename(os.path.abspath("."))) if params.pickle_plot: from libtbx.easy_pickle import dump dump('%s'%params.pickle_filename, fig_object) if params.show_plot: plt.legend(loc="upper right") plt.show()
def run_with_preparsed(experiments, reflections, params): from dxtbx.model import ExperimentList from scitbx.math import five_number_summary print("Found", len(reflections), "reflections", "and", len(experiments), "experiments") filtered_reflections = flex.reflection_table() filtered_experiments = ExperimentList() skipped_reflections = flex.reflection_table() skipped_experiments = ExperimentList() if params.detector is not None: culled_reflections = flex.reflection_table() culled_experiments = ExperimentList() detector = experiments.detectors()[params.detector] for expt_id, experiment in enumerate(experiments): refls = reflections.select(reflections['id'] == expt_id) if experiment.detector is detector: culled_experiments.append(experiment) refls['id'] = flex.int(len(refls), len(culled_experiments) - 1) culled_reflections.extend(refls) else: skipped_experiments.append(experiment) refls['id'] = flex.int(len(refls), len(skipped_experiments) - 1) skipped_reflections.extend(refls) print("RMSD filtering %d experiments using detector %d, out of %d" % (len(culled_experiments), params.detector, len(experiments))) reflections = culled_reflections experiments = culled_experiments difference_vector_norms = (reflections['xyzcal.mm'] - reflections['xyzobs.mm.value']).norms() if params.max_delta is not None: sel = difference_vector_norms <= params.max_delta reflections = reflections.select(sel) difference_vector_norms = difference_vector_norms.select(sel) data = flex.double() counts = flex.double() for i in range(len(experiments)): dvns = difference_vector_norms.select(reflections['id'] == i) counts.append(len(dvns)) if len(dvns) == 0: data.append(0) continue rmsd = math.sqrt(flex.sum_sq(dvns) / len(dvns)) data.append(rmsd) data *= 1000 subset = data.select(counts > 0) print(len(subset), "experiments with > 0 reflections") if params.show_plots: h = flex.histogram(subset, n_slots=40) fig = plt.figure() ax = fig.add_subplot('111') ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') plt.title("Histogram of %d image RMSDs" % len(subset)) fig = plt.figure() plt.boxplot(subset, vert=False) plt.title("Boxplot of %d image RMSDs" % len(subset)) plt.show() outliers = counts == 0 min_x, q1_x, med_x, q3_x, max_x = five_number_summary(subset) print( "Five number summary of RMSDs (microns): min %.1f, q1 %.1f, med %.1f, q3 %.1f, max %.1f" % (min_x, q1_x, med_x, q3_x, max_x)) iqr_x = q3_x - q1_x cut_x = params.iqr_multiplier * iqr_x outliers.set_selected(data > q3_x + cut_x, True) #outliers.set_selected(col < q1_x - cut_x, True) # Don't throw away the images that are outliers in the 'good' direction! for i in range(len(experiments)): if outliers[i]: continue refls = reflections.select(reflections['id'] == i) refls['id'] = flex.int(len(refls), len(filtered_experiments)) filtered_reflections.extend(refls) filtered_experiments.append(experiments[i]) #import IPython;IPython.embed() zeroes = counts == 0 n_zero = len(counts.select(zeroes)) print( "Removed %d bad experiments and %d experiments with zero reflections, out of %d (%%%.1f)" % (len(experiments) - len(filtered_experiments) - n_zero, n_zero, len(experiments), 100 * ((len(experiments) - len(filtered_experiments)) / len(experiments)))) if params.detector is not None: crystals = filtered_experiments.crystals() for expt_id, experiment in enumerate(skipped_experiments): if experiment.crystal in crystals: filtered_experiments.append(experiment) refls = skipped_reflections.select( skipped_reflections['id'] == expt_id) refls['id'] = flex.int(len(refls), len(filtered_experiments) - 1) filtered_reflections.extend(refls) if params.delta_psi_filter is not None: delta_psi = filtered_reflections['delpsical.rad'] * 180 / math.pi sel = (delta_psi <= params.delta_psi_filter) & ( delta_psi >= -params.delta_psi_filter) l = len(filtered_reflections) filtered_reflections = filtered_reflections.select(sel) print("Filtering by delta psi, removing %d out of %d reflections" % (l - len(filtered_reflections), l)) print("Final experiment count", len(filtered_experiments)) return filtered_experiments, filtered_reflections
def test_stills_pred_param(tc): print("Testing derivatives for StillsPredictionParameterisation") print("========================================================") # Build a prediction parameterisation for the stills experiment pred_param = StillsPredictionParameterisation( tc.stills_experiments, detector_parameterisations=[tc.det_param], beam_parameterisations=[tc.s0_param], xl_orientation_parameterisations=[tc.xlo_param], xl_unit_cell_parameterisations=[tc.xluc_param], ) # Predict the reflections in place. Must do this ahead of calculating # the analytical gradients so quantities like s1 are correct ref_predictor = StillsExperimentsPredictor(tc.stills_experiments) ref_predictor(tc.reflections) # get analytical gradients an_grads = pred_param.get_gradients(tc.reflections) fd_grads = tc.get_fd_gradients(pred_param, ref_predictor) for i, (an_grad, fd_grad) in enumerate(zip(an_grads, fd_grads)): # compare FD with analytical calculations print(f"\nParameter {i}: {fd_grad['name']}") for name in ["dX_dp", "dY_dp", "dDeltaPsi_dp"]: print(name) a = fd_grad[name] b = an_grad[name] abs_error = a - b fns = five_number_summary(abs_error) print((" summary of absolute errors: %9.6f %9.6f %9.6f " + "%9.6f %9.6f") % fns) assert flex.max(flex.abs(abs_error)) < 0.0003 # largest absolute error found to be about 0.00025 for dY/dp of # Crystal0g_param_3. Reject outlying absolute errors and test again. iqr = fns[3] - fns[1] # skip further stats on errors with an iqr of near zero, e.g. dDeltaPsi_dp # for detector parameters, which are all equal to zero if iqr < 1.0e-10: continue sel1 = abs_error < fns[3] + 1.5 * iqr sel2 = abs_error > fns[1] - 1.5 * iqr sel = sel1 & sel2 tst = flex.max_index(flex.abs(abs_error.select(sel))) tst_val = abs_error.select(sel)[tst] n_outliers = sel.count(False) print((" {0} outliers rejected, leaving greatest " + "absolute error: {1:9.6f}").format(n_outliers, tst_val)) # largest absolute error now 0.000086 for dX/dp of Beam0Mu2 assert abs(tst_val) < 0.00009 # Completely skip parameters with FD gradients all zero (e.g. gradients of # DeltaPsi for detector parameters) sel1 = flex.abs(a) < 1.0e-10 if sel1.all_eq(True): continue # otherwise calculate normalised errors, by dividing absolute errors by # the IQR (more stable than relative error calculation) norm_error = abs_error / iqr fns = five_number_summary(norm_error) print((" summary of normalised errors: %9.6f %9.6f %9.6f " + "%9.6f %9.6f") % fns) # largest normalised error found to be about 25.7 for dY/dp of # Crystal0g_param_3. try: assert flex.max(flex.abs(norm_error)) < 30 except AssertionError as e: e.args += ( f"extreme normalised error value: {flex.max(flex.abs(norm_error))}", ) raise e # Reject outlying normalised errors and test again iqr = fns[3] - fns[1] if iqr > 0.0: sel1 = norm_error < fns[3] + 1.5 * iqr sel2 = norm_error > fns[1] - 1.5 * iqr sel = sel1 & sel2 tst = flex.max_index(flex.abs(norm_error.select(sel))) tst_val = norm_error.select(sel)[tst] n_outliers = sel.count(False) # most outliers found for for dY/dp of Crystal0g_param_3 (which had # largest errors, so no surprise there). try: assert n_outliers < 250 except AssertionError as e: e.args += (f"too many outliers rejected: {n_outliers}", ) raise e print( (" {0} outliers rejected, leaving greatest " + "normalised error: {1:9.6f}").format(n_outliers, tst_val)) # largest normalied error now about -4. for dX/dp of Detector0Tau1 assert abs(tst_val) < 6, f"should be < 6, not {tst_val}"
def __init__(self, intensities, sym_op, cc_true, cc_sig_fac): """Initialise a ScoreSymmetryElement object. Args: intensities (cctbx.miller.array): The intensities on which to perform symmetry analysis. sym_op (cctbx.sgtbx.rt_mx): The symmetry operation for analysis. cc_true (float): the expected value of CC if the symmetry element is present, E(CC; S) cc_sig_fac (float): Estimation of sigma(CC) as a function of sample size. """ self.sym_op = sym_op assert self.sym_op.r().info().sense() >= 0 self.cc = CorrelationCoefficientAccumulator() cb_op = sgtbx.change_of_basis_op(self.sym_op) cb_ops = [cb_op] if self.sym_op.r().order() > 2: # include inverse symmetry operation cb_ops.append(cb_op.inverse()) for cb_op in cb_ops: if cb_op.is_identity_op(): cb_op = sgtbx.change_of_basis_op("-x,-y,-z") reindexed_intensities = intensities.change_basis( cb_op).map_to_asu() x, y = intensities.common_sets(reindexed_intensities, assert_is_similar_symmetry=False) sel = sgtbx.space_group().expand_smx(self.sym_op).epsilon( x.indices()) == 1 x = x.select(sel) y = y.select(sel) outliers = flex.bool(len(x.data()), False) iqr_multiplier = 20 # very generous tolerance for col in (x.data(), y.data()): if col.size(): min_x, q1_x, med_x, q3_x, max_x = five_number_summary(col) iqr_x = q3_x - q1_x cut_x = iqr_multiplier * iqr_x outliers.set_selected(col > q3_x + cut_x, True) outliers.set_selected(col < q1_x - cut_x, True) if outliers.count(True): logger.debug( "Rejecting %s outlier value%s", libtbx.utils.plural_s(outliers.count(True)), ) x = x.select(~outliers) y = y.select(~outliers) self.cc += CorrelationCoefficientAccumulator(x.data(), y.data()) self.n_refs = self.cc.n() if self.n_refs <= 0: self.likelihood = 0 self.z_cc = 0 return self.sigma_cc = max(0.1, cc_sig_fac / self.n_refs**0.5) self.z_cc = self.cc.coefficient() / self.sigma_cc score_cc = ScoreCorrelationCoefficient(self.cc.coefficient(), self.sigma_cc, cc_true) self.p_cc_given_s = score_cc.p_cc_given_s self.p_cc_given_not_s = score_cc.p_cc_given_not_s self.likelihood = score_cc.p_s_given_cc
def run(params): counter = 0 reference = None root=params.input_path fig_object = plt.figure() good_total = fail_total = 0 for filename in os.listdir(root): if os.path.splitext(filename)[1] != '.log': continue if 'rank' not in filename: continue fail_timepoints = [] good_timepoints = [] rank = int(filename.split('_')[1].split('.')[0]) counter += 1 print (filename, rank) for line in open(os.path.join(root,filename)): if not line.startswith('idx------finis-------->'): continue try: _, _, _, _, ts, _, elapsed = line.strip().split() ts = float(ts) except ValueError: continue if reference is None: reference = ts - float(elapsed) status = 'done' if status in ['stop','done','fail']: if status == 'done': good_timepoints.append(ts-reference) else: fail_timepoints.append(ts-reference) ok = True else: ok = False plt.plot(fail_timepoints, [rank]*len(fail_timepoints), 'b.') plt.plot(good_timepoints, [rank]*len(good_timepoints), 'g.') fail_total += len(fail_timepoints) good_total += len(good_timepoints) if not ok: plt.plot([ts - reference], [rank], 'rx') #if counter > 100: break fail_deltas = [fail_timepoints[i+1] - fail_timepoints[i] for i in range(len(fail_timepoints)-1)] good_deltas = [good_timepoints[i+1] - good_timepoints[i] for i in range(len(good_timepoints)-1)] if fail_deltas: print("Five number summary of %d fail image processing times:"%fail_total, five_number_summary(flex.double(fail_deltas))) if good_deltas: print("Five number summary of %d good image processing times:"%good_total, five_number_summary(flex.double(good_deltas))) for i in range(params.num_nodes): plt.plot([0,params.wall_time], [i*params.num_cores_per_node-0.5, i*params.num_cores_per_node-0.5], 'r-') plt.xlabel('Wall time (sec)') plt.ylabel('MPI Rank Number') plt.title(params.plot_title) if params.pickle_plot: from libtbx.easy_pickle import dump dump('%s'%params.pickle_filename, fig_object) if params.show_plot: plt.show()
def print_stats_on_matches(self): """Print some basic statistics on the matches""" l = self.get_matches() nref = len(l) from libtbx.table_utils import simple_table from scitbx.math import five_number_summary x_resid = l['x_resid'] y_resid = l['y_resid'] delpsi = l['delpsical.rad'] w_x, w_y, _ = l['xyzobs.mm.weights'].parts() w_delpsi = l['delpsical.weights'] msg = "\nSummary statistics for {0} observations".format(nref) +\ " matched to predictions:" header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] try: row_data = five_number_summary(x_resid) rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(y_resid) rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(delpsi) rows.append(["DeltaPsi (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_x) rows.append(["X weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_y) rows.append(["Y weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_delpsi) rows.append(["DeltaPsi weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data]) except IndexError: # zero length reflection list logger.warning("Unable to calculate summary statistics for zero observations") return logger.info(msg) st = simple_table(rows, header) logger.info(st.format()) logger.info("") # sorting is expensive and the following table is only of interest in # special cases, so return now if verbosity is not high if self._verbosity < 3: return if nref < 20: logger.debug("Fewer than 20 reflections matched!") return sl = self._sort_obs_by_residual(l) logger.debug("Reflections with the worst 20 positional residuals:") header = ['Miller index', 'x_resid', 'y_resid', 'pnl', 'x_obs', 'y_obs', 'x_obs\nweight', 'y_obs\nweight'] rows = [] for i in xrange(20): e = sl[i] x_obs, y_obs, _ = e['xyzobs.mm.value'] rows.append(['% 3d, % 3d, % 3d'%e['miller_index'], '%5.3f'%e['x_resid'], '%5.3f'%e['y_resid'], '%d'%e['panel'], '%5.3f'%x_obs, '%5.3f'%y_obs, '%5.3f'%e['xyzobs.mm.weights'][0], '%5.3f'%e['xyzobs.mm.weights'][1]]) logger.debug(simple_table(rows, header).format()) logger.debug("") return
def show_experiments(experiments, show_scan_varying=False, show_image_statistics=False): text = [] for i_expt, expt in enumerate(experiments): text.append("Experiment %i:" % i_expt) if expt.identifier != "": text.append("Experiment identifier: %s" % expt.identifier) text.append(str(expt.detector)) text.append( "Max resolution (at corners): %f" % (expt.detector.get_max_resolution(expt.beam.get_s0())) ) text.append( "Max resolution (inscribed): %f" % (expt.detector.get_max_inscribed_resolution(expt.beam.get_s0())) ) text.append("") text.append(show_beam(expt.detector, expt.beam)) if expt.scan is not None: text.append(str(expt.scan)) if expt.goniometer is not None: text.append(show_goniometer(expt.goniometer)) from six.moves import cStringIO as StringIO s = StringIO() if expt.crystal is not None: expt.crystal.show(show_scan_varying=show_scan_varying, out=s) text.append(s.getvalue()) if expt.crystal.num_scan_points: from scitbx.array_family import flex from cctbx import uctbx abc = flex.vec3_double() angles = flex.vec3_double() for n in range(expt.crystal.num_scan_points): a, b, c, alpha, beta, gamma = expt.crystal.get_unit_cell_at_scan_point( n ).parameters() abc.append((a, b, c)) angles.append((alpha, beta, gamma)) a, b, c = abc.mean() alpha, beta, gamma = angles.mean() mean_unit_cell = uctbx.unit_cell((a, b, c, alpha, beta, gamma)) text.append(" Average unit cell: %s" % mean_unit_cell) if expt.profile is not None: text.append(str(expt.profile)) if expt.scaling_model is not None: text.append(str(expt.scaling_model)) if expt.imageset is not None and show_image_statistics: # XXX This is gross, gross gross! # check_format=False, so we can't get the image data from the imageset for i in range(len(expt.imageset)): filename = expt.imageset.get_path(i) el = ExperimentListFactory.from_filenames((filename,)) if len(el) == 0: raise Sorry("Cannot find image {0}".format(filename)) pnl_data = el.imagesets()[0].get_raw_data(0) if not isinstance(pnl_data, tuple): pnl_data = (pnl_data,) flat_data = pnl_data[0].as_1d() for p in pnl_data[1:]: flat_data.extend(p.as_1d()) fns = five_number_summary(flat_data) text.append( "{0}: Min: {1:.1f} Q1: {2:.1f} Med: {3:.1f} Q3: {4:.1f} Max: {5:.1f}".format( os.path.basename(filename), *fns ) ) return "\n".join(text)
def run(params): counter = 0 root = params.input_path fig_object = plt.figure() good_total = fail_total = 0 all_psanats = [] all_deltas = [] fail_deltas = [] good_deltas = [] for filename in os.listdir(root): if os.path.splitext(filename)[1] != '.txt': continue if 'debug' not in filename: continue reference = None fail_timepoints = [] good_timepoints = [] rank = int(filename.split('_')[1].split('.')[0]) counter += 1 print(filename) run_timepoints = [] for line in open(os.path.join(root, filename)): try: hostname, psanats, ts, status, result = line.strip().split(',') except ValueError: continue if reference is None: sec, ms = reverse_timestamp(ts) reference = sec + ms * 1e-3 run_timepoints.append(0) assert status not in ['stop', 'done', 'fail'] if status in ['stop', 'done', 'fail']: sec, ms = reverse_timestamp(ts) run_timepoints.append((sec + ms * 1.e-3) - reference) if status == 'done': good_timepoints.append((sec + ms * 1.e-3) - reference) good_deltas.append(good_timepoints[-1] - run_timepoints[-2]) else: fail_timepoints.append((sec + ms * 1.e-3) - reference) fail_deltas.append(fail_timepoints[-1] - run_timepoints[-2]) all_psanats.append(psanats) all_deltas.append(run_timepoints[-1] - run_timepoints[-2]) ok = True else: ok = False plt.plot(fail_timepoints, [rank] * len(fail_timepoints), 'b.') plt.plot(good_timepoints, [rank] * len(good_timepoints), 'g.') fail_total += len(fail_timepoints) good_total += len(good_timepoints) if not ok: sec, ms = reverse_timestamp(ts) plt.plot([(sec + ms * 1e-3) - reference], [rank], 'rx') #if counter > 100: break if fail_deltas: print( "Five number summary of %d fail image processing times:" % fail_total, five_number_summary(flex.double(fail_deltas))) if good_deltas: print( "Five number summary of %d good image processing times:" % good_total, five_number_summary(flex.double(good_deltas))) if params.wall_time and params.num_nodes and params.num_cores_per_node - 0.5: for i in range(params.num_nodes): plt.plot([0, params.wall_time], [ i * params.num_cores_per_node - 0.5, i * params.num_cores_per_node - 0.5 ], 'r-') plt.xlabel('Wall time (sec)') plt.ylabel('MPI Rank Number') plt.title(params.plot_title) if params.pickle_plot: from libtbx.easy_pickle import dump dump('%s' % params.pickle_filename, fig_object) if params.show_plot: plt.show()
def plotit(reflections, experiments): """ Make the plots for a set of reflections and experiments. """ detector = experiments.detectors()[0] beam = experiments.beams()[ 0] # only used to compute resolution of 2theta reflections = reflections.select( reflections['intensity.sum.variance'] > 0) # Setup up deltaXY and two theta bins reflections['difference_vector_norms'] = ( reflections['xyzcal.mm'] - reflections['xyzobs.mm.value']).norms() reflections = setup_stats( detector, experiments, reflections, two_theta_only=True) # add two theta to reflection table sorted_two_theta = flex.sorted(reflections['two_theta_obs']) bin_low = [ sorted_two_theta[int((len(sorted_two_theta) / n_bins) * i)] for i in range(n_bins) ] bin_high = [bin_low[i + 1] for i in range(n_bins - 1)] bin_high.append(sorted_two_theta[-1] + arbitrary_padding) x_centers = flex.double() n_refls = flex.int() rmsds = flex.double() p25r = flex.double() p50r = flex.double() p75r = flex.double() p25i = flex.double() p50i = flex.double() p75i = flex.double() print("# 2theta Res N dXY IsigI") # Compute stats for each bin for i in range(n_bins): refls = reflections.select( (reflections['two_theta_obs'] >= bin_low[i]) & (reflections['two_theta_obs'] < bin_high[i])) # Only compute deltaXY stats on reflections with I/sigI at least 5 i_sigi = refls['intensity.sum.value'] / flex.sqrt( refls['intensity.sum.variance']) refls = refls.select(i_sigi >= 5) n = len(refls) if n < 10: continue min_r, q1_r, med_r, q3_r, max_r = five_number_summary( 1000 * refls['difference_vector_norms']) n_refls.append(n) rmsds_ = 1000 * math.sqrt( flex.sum_sq(refls['difference_vector_norms']) / n) min_i, q1_i, med_i, q3_i, max_i = five_number_summary(i_sigi) p25i.append(q1_i) p50i.append(med_i) p75i.append(q3_i) # x_center c = ((bin_high[i] - bin_low[i]) / 2) + bin_low[i] # resolution d = beam.get_wavelength() / (2 * math.sin(math.pi * c / (2 * 180))) x_centers.append(c) rmsds.append(rmsds_) print("%d % 5.1f % 5.1f % 8d %.1f %.1f" % (i, c, d, n, med_r, med_i)) p25r.append(q1_r) p50r.append(med_r) p75r.append(q3_r) # After binning, plot the results for plot in figures: ax1 = figures[plot]['ax1'] ax2 = figures[plot]['ax2'] if plot == 'isigi': line, = ax1.plot(x_centers.as_numpy_array(), p50i.as_numpy_array(), '-') line.set_label('Median') ax1.fill_between(x_centers.as_numpy_array(), p25i.as_numpy_array(), p75i.as_numpy_array(), interpolate=True, alpha=0.50, color=line.get_color()) line, = ax2.plot(x_centers.as_numpy_array(), n_refls.as_numpy_array(), '-', color=line.get_color()) line.set_label('Median') elif plot == 'deltaXY': line, = ax1.plot(x_centers.as_numpy_array(), p50r.as_numpy_array(), '-') line.set_label('Median') ax1.fill_between(x_centers.as_numpy_array(), p25r.as_numpy_array(), p75r.as_numpy_array(), interpolate=True, alpha=0.50, color=line.get_color()) line, = ax2.plot(x_centers.as_numpy_array(), n_refls.as_numpy_array(), '-', color=line.get_color()) line.set_label('Median') ax1.legend() ax2.legend()
def run_stills_pred_param(self, verbose = False): if verbose: print 'Testing derivatives for StillsPredictionParameterisation' print '========================================================' # Build a prediction parameterisation for the stills experiment pred_param = StillsPredictionParameterisation(self.stills_experiments, detector_parameterisations = [self.det_param], beam_parameterisations = [self.s0_param], xl_orientation_parameterisations = [self.xlo_param], xl_unit_cell_parameterisations = [self.xluc_param]) # Predict the reflections in place. Must do this ahead of calculating # the analytical gradients so quantities like s1 are correct from dials.algorithms.refinement.prediction import ExperimentsPredictor ref_predictor = ExperimentsPredictor(self.stills_experiments) ref_predictor.update() ref_predictor.predict(self.reflections) # get analytical gradients an_grads = pred_param.get_gradients(self.reflections) fd_grads = self.get_fd_gradients(pred_param, ref_predictor) for i, (an_grad, fd_grad) in enumerate(zip(an_grads, fd_grads)): # compare FD with analytical calculations if verbose: print "\nParameter {0}: {1}". format(i, fd_grad['name']) for idx, name in enumerate(["dX_dp", "dY_dp", "dDeltaPsi_dp"]): if verbose: print name a = fd_grad[name] b = an_grad[name] abs_error = a - b denom = a + b fns = five_number_summary(abs_error) if verbose: print (" summary of absolute errors: %9.6f %9.6f %9.6f " + \ "%9.6f %9.6f") % fns assert flex.max(flex.abs(abs_error)) < 0.0003 # largest absolute error found to be about 0.00025 for dY/dp of # Crystal0g_param_3. Reject outlying absolute errors and test again. iqr = fns[3] - fns[1] # skip further stats on errors with an iqr of near zero, e.g. dDeltaPsi_dp # for detector parameters, which are all equal to zero if iqr < 1.e-10: continue sel1 = abs_error < fns[3] + 1.5 * iqr sel2 = abs_error > fns[1] - 1.5 * iqr sel = sel1 & sel2 tst = flex.max_index(flex.abs(abs_error.select(sel))) tst_val = abs_error.select(sel)[tst] n_outliers = sel.count(False) if verbose: print (" {0} outliers rejected, leaving greatest " + \ "absolute error: {1:9.6f}").format(n_outliers, tst_val) # largest absolute error now 0.000086 for dX/dp of Beam0Mu2 assert abs(tst_val) < 0.00009 # Completely skip parameters with FD gradients all zero (e.g. gradients of # DeltaPsi for detector parameters) sel1 = flex.abs(a) < 1.e-10 if sel1.all_eq(True): continue # otherwise calculate normalised errors, by dividing absolute errors by # the IQR (more stable than relative error calculation) norm_error = abs_error / iqr fns = five_number_summary(norm_error) if verbose: print (" summary of normalised errors: %9.6f %9.6f %9.6f " + \ "%9.6f %9.6f") % fns # largest normalised error found to be about 25.7 for dY/dp of # Crystal0g_param_3. try: assert flex.max(flex.abs(norm_error)) < 30 except AssertionError as e: e.args += ("extreme normalised error value: {0}".format( flex.max(flex.abs(norm_error))),) raise e # Reject outlying normalised errors and test again iqr = fns[3] - fns[1] if iqr > 0.: sel1 = norm_error < fns[3] + 1.5 * iqr sel2 = norm_error > fns[1] - 1.5 * iqr sel = sel1 & sel2 tst = flex.max_index(flex.abs(norm_error.select(sel))) tst_val = norm_error.select(sel)[tst] n_outliers = sel.count(False) # most outliers found for for dY/dp of Crystal0g_param_3 (which had # largest errors, so no surprise there). try: assert n_outliers < 250 except AssertionError as e: e.args += ("too many outliers rejected: {0}".format(n_outliers),) raise e if verbose: print (" {0} outliers rejected, leaving greatest " + \ "normalised error: {1:9.6f}").format(n_outliers, tst_val) # largest normalied error now about -4. for dX/dp of Detector0Tau1 assert abs(tst_val) < 4.5 if verbose: print return
def adjust_errors(self): """ Propagate errors to the scaled and merged intensity errors based on statistical error propagation. This uses 1) and estimate of the errors in the post-refined parametes from the observed population and 2) partial derivatives of the scaled intensity with respect to each of the post-refined parameters. """ assert self.scaler.params.postrefinement.algorithm == 'rs' refls = self.scaler.ISIGI ct = self.scaler.crystal_table # Note, since the rs algorithm doesn't explicitly refine eta and deff separately, but insteads refines RS, # assume rs only incorporates information from deff and set eta to zero. ct['deff'] = 1/ct['RS'] ct['eta'] = flex.double(len(ct), 0) # Compute errors by examining distributions of parameters stats_thetax = flex.mean_and_variance(ct['thetax']) stats_thetay = flex.mean_and_variance(ct['thetay']) stats_lambda = flex.mean_and_variance(ct['wavelength']) #stats_eta = flex.mean_and_variance(ct['ETA']) stats_deff = flex.mean_and_variance(ct['deff']) stats_rs = flex.mean_and_variance(ct['RS']) sigma_thetax = stats_thetax.unweighted_sample_standard_deviation() sigma_thetay = stats_thetay.unweighted_sample_standard_deviation() sigma_lambda = stats_lambda.unweighted_sample_standard_deviation() sigma_eta = 0 #stats_eta.unweighted_sample_standard_deviation() sigma_deff = stats_deff.unweighted_sample_standard_deviation() sigma_rs = stats_rs.unweighted_sample_standard_deviation() print >> self.log, "ThetaX %.4f +/- %.4f" %(r2d(stats_thetax.mean()), r2d(sigma_thetax)) print >> self.log, "Thetay %.4f +/- %.4f" %(r2d(stats_thetay.mean()), r2d(sigma_thetay)) print >> self.log, "Wavelength %.4f +/- %.4f"%( stats_lambda.mean(), sigma_lambda) #print "ETA %.4f +/- %.4f" %( stats_eta.mean(), sigma_eta) print >> self.log, "DEFF %.4f +/- %.4f" %( stats_deff.mean(), sigma_deff) print >> self.log, "RS %.6f +/- %.6f" %( stats_rs.mean(), sigma_rs) # notation: dP1_dP2 is derivative of parameter 1 with respect to parameter 2. Here, # for example, is the derivative of rx wrt thetax drx_dthetax = flex.mat3_double() dry_dthetay = flex.mat3_double() s0hat = flex.vec3_double(len(refls), (0,0,-1)) ex = col((1,0,0)) ey = col((0,1,0)) # Compute derivatives sre = symmetrize_reduce_enlarge(self.scaler.params.target_space_group.group()) c_gstar_params = None gstar_params = None gstar_derivatives = None for i in xrange(len(ct)): n_refl = ct['n_refl'][i] # Derivatives of rx/y wrt thetax/y come from cctbx drx_dthetax.extend(flex.mat3_double(n_refl, ex.axis_and_angle_as_r3_derivative_wrt_angle(ct['thetax'][i]))) dry_dthetay.extend(flex.mat3_double(n_refl, ey.axis_and_angle_as_r3_derivative_wrt_angle(ct['thetay'][i]))) # Derivatives of the B matrix wrt to the unit cell parameters also come from cctbx sre.set_orientation(orientation=ct['b_matrix'][i]) p = sre.forward_independent_parameters() dB_dp = sre.forward_gradients() if gstar_params is None: assert gstar_derivatives is None and c_gstar_params is None c_gstar_params = [flex.double() for j in xrange(len(p))] gstar_params = [flex.double() for j in xrange(len(p))] gstar_derivatives = [flex.mat3_double() for j in xrange(len(p))] assert len(p) == len(dB_dp) == len(gstar_params) == len(gstar_derivatives) == len(c_gstar_params) for j in xrange(len(p)): c_gstar_params[j].append(p[j]) gstar_params[j].extend(flex.double(n_refl, p[j])) gstar_derivatives[j].extend(flex.mat3_double(n_refl, tuple(dB_dp[j]))) # Compute the error in the unit cell terms from the distribution of unit cell parameters provided print >> self.log, "Free G* parameters" sigma_gstar = [] for j in xrange(len(gstar_params)): stats = flex.mean_and_variance(c_gstar_params[j]) print >> self.log, "G* %d %.4f *1e-5 +/- %.4f *1e-5"%(j, stats.mean()*1e5, stats.unweighted_sample_standard_deviation()*1e5) sigma_gstar.append(stats.unweighted_sample_standard_deviation()) # Compute the scalar terms used while computing derivatives r = self.compute_intensity_parameters() # Begin computing derivatives sigma_Iobs = refls['scaled_intensity']/refls['isigi'] dI_dIobs = 1/r['D'] def compute_dI_dp(dq_dp): """ Deriviatives of the scaled intensity I wrt to thetax, thetay and the unit cell parameters are computed the same, starting with the deriviatives of those parameters wrt to q """ dqlen_dp = r['q'].dot(dq_dp)/r['qlen'] dd_dp = -(1/(r['qlen']**2)) * dqlen_dp drs_dp = -(r['eta']/(2 * r['d']**2)) * dd_dp dslen_dp = r['s'].dot(dq_dp)/r['slen'] drhsq_dp = 2 * (r['slen'] - (1/r['wavelength'])) * dslen_dp dPn_dp = 2 * r['rs'] * drs_dp dPd_dp = 2 * ((r['rs'] * drs_dp) + drhsq_dp) dP_dp = ((r['p_d'] * dPn_dp)-(r['p_n'] * dPd_dp))/(r['p_d']**2) dI_dp = -(refls['iobs']/(r['partiality']**2 * r['G'] * r['eepsilon'])) * dP_dp return dI_dp # Derivatives wrt the unit cell parameters dI_dgstar = [] for j in xrange(len(gstar_params)): dI_dgstar.append(compute_dI_dp(r['ry'] * r['rx'] * r['u'] * gstar_derivatives[j] * r['h'])) # Derivatives wrt the crystal orientation dI_dthetax = compute_dI_dp(r['ry'] * drx_dthetax * r['u'] * r['b'] * r['h']) dI_dthetay = compute_dI_dp(dry_dthetay * r['rx'] * r['u'] * r['b'] * r['h']) # Derivatives wrt to the wavelength dthetah_dlambda = 1/(flex.sqrt(1 - ((r['wavelength']/(2 * r['d']))**2)) * 2 * r['d']) den_dlambda = flex.cos(r['thetah']) * dthetah_dlambda der_dlambda = ((r['wavelength'] * den_dlambda) - r['sinthetah'])/r['wavelength']**2 depsilon_dlambda = -16 * r['B'] * r['er'] * der_dlambda ds0_dlambda = s0hat*(-1/r['wavelength']**2) dslen_dlambda = r['s'].dot(ds0_dlambda)/r['slen'] drhsq_dlambda = 2*(r['slen']-(1/r['wavelength']))*(dslen_dlambda+(1/r['wavelength']**2)) dP_dlambda = -2*(r['p_n']/r['p_d']**2) * drhsq_dlambda dD_dlambda = (r['G'] * r['eepsilon'] * dP_dlambda) + (r['partiality'] * r['G'] * r['eepsilon'] * depsilon_dlambda) dI_dlambda = -(refls['iobs']/r['D']**2) * dD_dlambda # Derivatives wrt to the deff drs_deff = -1/(r['deff']**2) dPn_deff = 2 * r['rs'] * drs_deff dPd_deff = 2 * r['rs'] * drs_deff dP_deff = ((r['p_d'] * dPn_deff)-(r['p_n'] * dPd_deff))/(r['p_d']**2) dI_deff = -(refls['iobs']/(r['partiality']**2 * r['G'] * r['eepsilon'])) * dP_deff # Derivatives wrt to eta drs_deta = 1/(2*r['d']) dPn_deta = 2 * r['rs'] * drs_deta dPd_deta = 2 * r['rs'] * drs_deta dP_deta = ((r['p_d']*dPn_deta)-(r['p_n']*dPd_deta))/(r['p_d']**2) dI_deta = -(refls['iobs']/(r['partiality']**2 * r['G'] * r['eepsilon'])) * dP_deta if True: # Show comparisons to finite differences n_cryst_params = sre.constraints.n_independent_params() print "Showing finite differences and derivatives for each parameter (first few reflections only)" for parameter_name, table, derivatives, delta, in zip(['iobs', 'thetax', 'thetay', 'wavelength', 'deff', 'eta'] + ['c%d'%cp for cp in xrange(n_cryst_params)], [refls, ct, ct, ct, ct, ct] + [ct]*n_cryst_params, [dI_dIobs, dI_dthetax, dI_dthetay, dI_dlambda, dI_deff, dI_deta] + dI_dgstar, [1e-7]*6 + [1e-11]*n_cryst_params): finite_g = self.finite_difference(parameter_name, table, delta) print parameter_name for refl_id in xrange(min(10, len(refls))): print "%d % 21.1f % 21.1f"%(refl_id, finite_g[refl_id], derivatives[refl_id]) stats = flex.mean_and_variance(finite_g-derivatives) stats_finite = flex.mean_and_variance(finite_g) percent = 0 if stats_finite.mean() == 0 else 100*stats.mean()/stats_finite.mean() print "Mean difference between finite and analytical: % 24.4f +/- % 24.4f (%8.3f%% of finite d.)"%( \ stats.mean(), stats.unweighted_sample_standard_deviation(), percent) print # Propagate errors refls['isigi'] = refls['scaled_intensity'] / flex.sqrt(((sigma_Iobs**2 * dI_dIobs**2) + sum([sigma_gstar[j]**2 * dI_dgstar[j]**2 for j in xrange(len(sigma_gstar))]) + (sigma_thetax**2 * dI_dthetax**2) + (sigma_thetay**2 * dI_dthetay**2) + (sigma_lambda**2 * dI_dlambda**2) + (sigma_deff**2 * dI_deff**2) + (sigma_eta**2 * dI_deta**2))) # Show results of propagation from scitbx.math import five_number_summary all_data = [(refls['iobs'], "Iobs"), (sigma_Iobs, "Original errors"), (1/r['D'], "Total scale factor"), (refls['iobs']/r['D'], "Inflated intensities"), (refls['scaled_intensity']/refls['isigi'], "Propagated errors"), (flex.sqrt(sigma_Iobs**2 * dI_dIobs**2), "Iobs term"), (flex.sqrt(sigma_thetax**2 * dI_dthetax**2), "Thetax term"), (flex.sqrt(sigma_thetay**2 * dI_dthetay**2), "Thetay term"), (flex.sqrt(sigma_lambda**2 * dI_dlambda**2), "Wavelength term"), (flex.sqrt(sigma_deff**2 * dI_deff**2), "Deff term"), (flex.sqrt(sigma_eta**2 * dI_deta**2), "Eta term")] + \ [(flex.sqrt(sigma_gstar[j]**2 * dI_dgstar[j]**2), "Gstar term %d"%j) for j in xrange(len(sigma_gstar))] print >> self.log, "%20s % 20s % 20s % 20s"%("Data name","Quartile 1", "Median", "Quartile 3") for data, title in all_data: fns = five_number_summary(data) print >> self.log, "%20s % 20d % 20d % 20d"%(title, fns[1], fns[2], fns[3]) # Final terms for cxi.merge self.scaler.summed_weight= flex.double(self.scaler.n_refl, 0.) self.scaler.summed_wt_I = flex.double(self.scaler.n_refl, 0.) Intensity = refls['scaled_intensity'] sigma = Intensity / refls['isigi'] variance = sigma * sigma for i in xrange(len(refls)): j = refls['miller_id'][i] self.scaler.summed_wt_I[j] += Intensity[i] / variance[i] self.scaler.summed_weight[j] += 1 / variance[i]
from scitbx.math import five_number_summary message = ''' this script compares predicted (x,y) vs observed (x,y) on the detector ''' print (message) def apply_filter(hkl_tuple, filter_array = [1,1,1]): return tuple((hkl_tuple[0]*filter_array[0],hkl_tuple[1]*filter_array[1] , hkl_tuple[2]*filter_array[2])) refl_iota = load('idx-step5_MPIbatch_000064.img_indexed.pickle') iota_dr = [] for ii in range(len(refl_iota)): xyzobs_iota = refl_iota['xyzobs.px.value'][ii] xyzcal_iota = refl_iota['xyzcal.px'][ii] iota_dr.append((col(xyzobs_iota)-col(xyzcal_iota)).length()) print ('Now analyzing: Printing 5-number summary of dR = |robs-rcal|') print (five_number_summary(iota_dr)) print ('Now plotting histogram of difference in dR = |robs-rcal|') import matplotlib.pyplot as plt plt.figure(1) plt.hist(iota_dr,bins=20) #plt.xlim([-1, max(max(base_dr), max(iota_dr))]) plt.show() #from IPython import embed; embed(); exit()
min_slow = 540 delta = 100 frame = sys.argv[1] intensity = dxtbx.load("fft_frame_I_%s.cbf" % frame).get_raw_data() intensity_adjust = followup_brightness_scale(intensity) intensity = intensity[min_slow:min_slow + delta, min_fast:min_fast + delta] intensity_adjust = intensity_adjust[min_slow:min_slow + delta, min_fast:min_fast + delta] phases = dxtbx.load("fft_frame_phase_%s.cbf" % frame).get_raw_data() phases = phases[min_slow:min_slow + delta, min_fast:min_fast + delta] fast, slow = intensity.focus() min_i, q1_i, med_i, q3_i, max_i = five_number_summary(intensity.as_1d()) iqr = (q3_i - q1_i) * 10 max_value = med_i + (iqr / 2) print "Cutting I at", max_value i = intensity.as_numpy_array() i[i < 0] = 0 i[i > max_value] = max_value i = i * (1 / max_value) p = phases.as_numpy_array() p = p % 180 ones = np.zeros(i.shape) + 1 plt.imshow(intensity_adjust.as_numpy_array(), cmap='gray') plt.title("Intensities")
def run_stills_pred_param(self, verbose=False): if verbose: print 'Testing derivatives for StillsPredictionParameterisation' print '========================================================' # Build a prediction parameterisation for the stills experiment pred_param = StillsPredictionParameterisation( self.stills_experiments, detector_parameterisations=[self.det_param], beam_parameterisations=[self.s0_param], xl_orientation_parameterisations=[self.xlo_param], xl_unit_cell_parameterisations=[self.xluc_param]) # Predict the reflections in place. Must do this ahead of calculating # the analytical gradients so quantities like s1 are correct from dials.algorithms.refinement.prediction import ExperimentsPredictor ref_predictor = ExperimentsPredictor(self.stills_experiments) ref_predictor(self.reflections) # get analytical gradients an_grads = pred_param.get_gradients(self.reflections) fd_grads = self.get_fd_gradients(pred_param, ref_predictor) for i, (an_grad, fd_grad) in enumerate(zip(an_grads, fd_grads)): # compare FD with analytical calculations if verbose: print "\nParameter {0}: {1}".format(i, fd_grad['name']) for idx, name in enumerate(["dX_dp", "dY_dp", "dDeltaPsi_dp"]): if verbose: print name a = fd_grad[name] b = an_grad[name] abs_error = a - b denom = a + b fns = five_number_summary(abs_error) if verbose: print (" summary of absolute errors: %9.6f %9.6f %9.6f " + \ "%9.6f %9.6f") % fns assert flex.max(flex.abs(abs_error)) < 0.0003 # largest absolute error found to be about 0.00025 for dY/dp of # Crystal0g_param_3. Reject outlying absolute errors and test again. iqr = fns[3] - fns[1] # skip further stats on errors with an iqr of near zero, e.g. dDeltaPsi_dp # for detector parameters, which are all equal to zero if iqr < 1.e-10: continue sel1 = abs_error < fns[3] + 1.5 * iqr sel2 = abs_error > fns[1] - 1.5 * iqr sel = sel1 & sel2 tst = flex.max_index(flex.abs(abs_error.select(sel))) tst_val = abs_error.select(sel)[tst] n_outliers = sel.count(False) if verbose: print (" {0} outliers rejected, leaving greatest " + \ "absolute error: {1:9.6f}").format(n_outliers, tst_val) # largest absolute error now 0.000086 for dX/dp of Beam0Mu2 assert abs(tst_val) < 0.00009 # Completely skip parameters with FD gradients all zero (e.g. gradients of # DeltaPsi for detector parameters) sel1 = flex.abs(a) < 1.e-10 if sel1.all_eq(True): continue # otherwise calculate normalised errors, by dividing absolute errors by # the IQR (more stable than relative error calculation) norm_error = abs_error / iqr fns = five_number_summary(norm_error) if verbose: print (" summary of normalised errors: %9.6f %9.6f %9.6f " + \ "%9.6f %9.6f") % fns # largest normalised error found to be about 25.7 for dY/dp of # Crystal0g_param_3. try: assert flex.max(flex.abs(norm_error)) < 30 except AssertionError as e: e.args += ("extreme normalised error value: {0}".format( flex.max(flex.abs(norm_error))), ) raise e # Reject outlying normalised errors and test again iqr = fns[3] - fns[1] if iqr > 0.: sel1 = norm_error < fns[3] + 1.5 * iqr sel2 = norm_error > fns[1] - 1.5 * iqr sel = sel1 & sel2 tst = flex.max_index(flex.abs(norm_error.select(sel))) tst_val = norm_error.select(sel)[tst] n_outliers = sel.count(False) # most outliers found for for dY/dp of Crystal0g_param_3 (which had # largest errors, so no surprise there). try: assert n_outliers < 250 except AssertionError as e: e.args += ("too many outliers rejected: {0}".format( n_outliers), ) raise e if verbose: print (" {0} outliers rejected, leaving greatest " + \ "normalised error: {1:9.6f}").format(n_outliers, tst_val) # largest normalied error now about -4. for dX/dp of Detector0Tau1 assert abs( tst_val) < 4.5, 'should be about 4 not %s' % tst_val if verbose: print return