def print_stats_on_matches(self): l = self.get_matches() nref = len(l) if nref == 0: logger.warning( "Unable to calculate summary statistics for zero observations") return twotheta_resid = l["2theta_resid"] w_2theta = l["2theta.weights"] msg = ("\nSummary statistics for {} observations".format(nref) + " matched to predictions:") header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] row_data = five_number_summary(twotheta_resid) rows.append(["2theta_c - 2theta_o (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_2theta) rows.append(["2theta weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data]) st = simple_table(rows, header) logger.info(msg) logger.info(st.format()) logger.info("")
def run(self): results = [self.process(image) for image in self.images] header = (["Image", "Num spots"] + [str(i + 1) for i, _ in enumerate(self.cmds)] + ["Best"]) rows = [] for im, res in zip(self.images, results): offset = [] for v in res["offset_deg"]: if v is None: offset.append("fail") else: offset.append("{:.3f}".format(v)) row = [ str(e1) + ":" + e2 for e1, e2 in zip(res["nindexed"], offset) ] row = [im, str(res["nspots"])] + row nindexed = res["nindexed"] top = max(nindexed) best = " " for i, _ in enumerate(self.cmds): if nindexed[i] == top: best += str(i + 1) + " " if res["nspots"] == top: best += "*" rows.append(row + [best]) st = simple_table(rows, header) print(st.format())
def print_stats_on_matches(self): l = self.get_matches() nref = len(l) from libtbx.table_utils import simple_table from scitbx.math import five_number_summary twotheta_resid = l['2theta_resid'] w_2theta = l['2theta.weights'] msg = "\nSummary statistics for {0} observations".format(nref) +\ " matched to predictions:" header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] try: row_data = five_number_summary(twotheta_resid) rows.append(["2theta_c - 2theta_o (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_2theta) rows.append(["2theta weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data]) st = simple_table(rows, header) except IndexError: # zero length reflection list logger.warning( "Unable to calculate summary statistics for zero observations") return logger.info(msg) logger.info(st.format()) logger.info("")
def minimisation_summary(self): """Generate a summary of the model minimisation for output.""" header = [ "Intensity range (<Ih>)", "n_refl", "Uncorrected variance", "Corrected variance", ] rows = [] bin_bounds = ["%.2f" % i for i in self.binning_info["bin_boundaries"]] for i, (initial_var, bin_var, n_refl) in enumerate( zip( self.binning_info["initial_variances"], self.binning_info["bin_variances"], self.binning_info["refl_per_bin"], )): rows.append([ bin_bounds[i] + " - " + bin_bounds[i + 1], str(int(n_refl)), str(round(initial_var, 3)), str(round(bin_var, 3)), ]) st = simple_table(rows, header) return "\n".join(( "Results of error model refinement. Uncorrected and corrected variances", "of normalised intensity deviations for given intensity ranges. Variances", "are expected to be ~1.0 for reliable errors (sigmas).", st.format(), "", ))
def print_stats_on_matches(self): l = self.get_matches() nref = len(l) from libtbx.table_utils import simple_table from scitbx.math import five_number_summary twotheta_resid = l['2theta_resid'] w_2theta = l['2theta.weights'] msg = "\nSummary statistics for {0} observations".format(nref) +\ " matched to predictions:" header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] try: row_data = five_number_summary(twotheta_resid) rows.append(["2theta_c - 2theta_o (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_2theta) rows.append(["2theta weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data]) st = simple_table(rows, header) except IndexError: # zero length reflection list logger.warning("Unable to calculate summary statistics for zero observations") return logger.info(msg) logger.info(st.format()) logger.info("")
def interpret_results(self): """Inspect the data in results_dict, make a nice table with the mean and average over many attempts and indicate the 'best' option""" rows = [] headers = ["option", ""] + self.results_metadata["names"] monitored_values = [] # Construct the rows, using the metadata from the results dict for v in self.results_dict.values(): config_str = " ".join(v["configuration"]) vals, stds = [], [] for i, name in enumerate(self.results_metadata["names"]): val, std = self._avg_sd_from_list(v[name]) vals.append(val) stds.append(std) if i in self.results_metadata["indices_to_monitor"]: monitored_values.append(val) rows.append([config_str, "mean"] + [str(i) for i in vals]) rows.append(["", "std dev"] + [str(i) for i in stds]) # Now go through monitored values, finding the best and adding a '*' n_monitored = len(self.results_metadata["indices_to_monitor"]) for i in range(n_monitored): vals = monitored_values[i::n_monitored] if self.results_metadata["best_criterion"][i] == "max": best_idx = vals.index(max(vals)) * 2 # *2 to skip std rows elif self.results_metadata["best_criterion"][i] == "min": best_idx = vals.index(min(vals)) * 2 # *2 to skip std rows rows[best_idx][self.results_metadata["indices_to_monitor"][i] + 2] += "*" # line above, 2 is to offset first two columns in table return simple_table(rows, headers)
def select_highly_connected_reflections(Ih_table_block, experiment, min_per_area, n_resolution_bins, print_summary=False): """Select highly connected reflections within a dataset, across resolutions.""" min_per_bin = min_per_area * 12 * 1.5 max_per_bin = min_per_area * 12 * 3.0 assert "s1c" in Ih_table_block.Ih_table theta_phi_1 = calc_theta_phi(Ih_table_block.Ih_table["s1c"]) theta = theta_phi_1.parts()[0] phi = theta_phi_1.parts()[1] Ih_table_block.Ih_table["phi"] = (phi * 180 / pi) + 180.0 Ih_table_block.Ih_table["theta"] = theta * 180 / pi Ih_table_block.Ih_table = assign_segment_index(Ih_table_block.Ih_table) Ih_table_block.setup_binner( experiment.crystal.get_unit_cell(), experiment.crystal.get_space_group(), n_resolution_bins, ) binner = Ih_table_block.binner overall_indices = flex.size_t() header = ["d-range", "n_refl"] + [str(i) for i in range(0, 12)] rows = [] for ibin in binner.range_all(): sel = binner.selection(ibin) sel_Ih_table_block = Ih_table_block.select(sel) indices_wrt_original = Ih_table_block.Ih_table["loc_indices"].select( sel) indices, total_in_classes = select_highly_connected_reflections_in_bin( sel_Ih_table_block, min_per_area, min_per_bin, max_per_bin) if indices: overall_indices.extend(indices_wrt_original.select(indices)) d0, d1 = binner.bin_d_range(ibin) rows.append([ str(round(d0, 3)) + " - " + str(round(d1, 3)), str(int(flex.sum(total_in_classes))), ] + [str(int(i)) for i in total_in_classes]) st = simple_table(rows, header) msg = """\nSummary of reflection selection algorithm for this dataset: %s resolution bins, target: at least %s reflections per area, between %s and %s reflections per resolution bin""" % ( n_resolution_bins, min_per_area, 18 * min_per_area, 36 * min_per_area, ) if print_summary: logger.info(msg) logger.info(st.format()) else: logger.debug(msg) logger.debug(st.format()) return overall_indices
def print_scaling_summary(self, scaling_script): """Log summary information after scaling.""" if ScalingModelObserver().data: logger.info(ScalingModelObserver().return_model_error_summary()) valid_ranges = get_valid_image_ranges(scaling_script.experiments) image_ranges = get_image_ranges(scaling_script.experiments) msg = [] for (img, valid, exp) in zip( image_ranges, valid_ranges, scaling_script.experiments ): if valid: if len(valid) > 1 or valid[0][0] != img[0] or valid[-1][1] != img[1]: msg.append( "Excluded images for experiment identifier: %s, image range: %s, limited range: %s" % (exp.identifier, list(img), list(valid)) ) if msg: msg = ["Summary of image ranges removed:"] + msg logger.info("\n".join(msg)) # report on partiality of dataset partials = flex.double() for r in scaling_script.reflections: if "partiality" in r: partials.extend(r["partiality"]) not_full_sel = partials < 0.99 not_zero_sel = partials > 0.01 gt_half = partials > 0.5 lt_half = partials < 0.5 partial_gt_half_sel = not_full_sel & gt_half partial_lt_half_sel = not_zero_sel & lt_half logger.info("Summary of dataset partialities") header = ["Partiality (p)", "n_refl"] rows = [ ["all reflections", str(partials.size())], ["p > 0.99", str(not_full_sel.count(False))], ["0.5 < p < 0.99", str(partial_gt_half_sel.count(True))], ["0.01 < p < 0.5", str(partial_lt_half_sel.count(True))], ["p < 0.01", str(not_zero_sel.count(False))], ] st = simple_table(rows, header) logger.info(st.format()) logger.info( """ Reflections below a partiality_cutoff of %s are not considered for any part of the scaling analysis or for the reporting of merging statistics. Additionally, if applicable, only reflections with a min_partiality > %s were considered for use when refining the scaling model. """, scaling_script.params.cut_data.partiality_cutoff, scaling_script.params.reflection_selection.min_partiality, ) if MergingStatisticsObserver().data: logger.info( make_merging_statistics_summary( MergingStatisticsObserver().data["statistics"] ) )
def _show_impl(self, out): ss, rr, ii, zz = self.data_as_flex_arrays() flagged = zz > self.cutoff sel_ss = ss.select(flagged) sel_z = zz.select(flagged) sel_r = rr.select(flagged) sel_i = ii.select(flagged) out.show_sub_header("Relative Wilson plot") out.show_text("""\ The relative Wilson plot compares the mean intensity of the observed data with the mean intensity computed from the model, as a function of resolution. This curve is expected to fall off at low resolution if no contribution for bulk solvent is provided for the calculated intensities, because the presence of bulk solvent reduces the observed intensities at low resolution by reducing the contrast. At high resolution, the curve should be a straight line with a slope that reflects the difference in overall B-factor between the model and the data. Compared to the normal Wilson plot, the relative Wilson plot is more linear because the influence of favored distances between atoms, caused by bonding and secondary structure, is cancelled out. """) out.show_plot(self.table) if (self.all_bad_z_scores): out.warn("""\ All resolution shells have Z-scores above %4.2f sigma. This is indicative of severe problems with the input data, including processing errors or ice rings. We recommend checking the logs for data processing and inspecting the raw images.\n""" % self.cutoff) else: out.show_text("""\ All relative wilson plot outliers above %4.2f sigma are reported. """ % self.cutoff) out.newline() rows = [] if len(sel_ss) > 0: for s, z, r, i in zip(sel_ss, sel_z, sel_r, sel_i): sss = math.sqrt(1.0 / s) rows.append( ["%8.2f" % sss, "%9.3e" % r, "%9.3e" % i, "%5.2f" % z]) table = table_utils.simple_table(column_headers=[ "d-spacing", "Obs. Log[ratio]", "Expected Log[ratio]", "Z-score" ], table_rows=rows) out.show_table(table) else: out.show( "The Relative wilson plot doesn't indicate any serious errors." )
def interpret_results(results_dict): """Pass in a dict of results. Each item is a different attempt. Expect a configuration and final_rmsds columns. Score the data and make a nice table.""" rows = [] headers = ["option", "", "Rwork", "Rfree", "CCwork", "CCfree"] free_rmsds = [] free_cc12s = [] def avg_sd_from_list(lst): """simple function to get average and standard deviation""" arr = flex.double(lst) avg = round(flex.mean(arr), 5) std = round(arr.standard_deviation_of_the_sample(), 5) return avg, std for v in results_dict.itervalues(): config_str = " ".join(v["configuration"]) avg_work, std_work = avg_sd_from_list(v["Rwork"]) avg_free, std_free = avg_sd_from_list(v["Rfree"]) avg_ccwork, std_ccwork = avg_sd_from_list(v["CCwork"]) avg_ccfree, std_ccfree = avg_sd_from_list(v["CCfree"]) rows.append([ config_str, "mean", str(avg_work), str(avg_free), str(avg_ccwork), str(avg_ccfree), ]) rows.append([ "", "std dev", str(std_work), str(std_free), str(std_ccwork), str(std_ccfree), ]) free_rmsds.append(avg_free) free_cc12s.append(avg_ccfree) # find lowest free rmsd low_rmsd_idx = free_rmsds.index(min(free_rmsds)) * 2 # *2 to skip std rows high_cc12_idx = free_cc12s.index(max(free_cc12s)) * 2 rows[low_rmsd_idx][3] += "*" rows[high_cc12_idx][5] += "*" st = simple_table(rows, headers) logger.info("Summary of the cross validation analysis: \n") logger.info(st.format())
def print_stats_on_matches(self): """Print some basic statistics on the matches""" l = self.get_matches() nref = len(l) if nref == 0: logger.warning( "Unable to calculate summary statistics for zero observations" ) return from libtbx.table_utils import simple_table from scitbx.math import five_number_summary try: x_resid = l["x_resid"] y_resid = l["y_resid"] delpsi = l["delpsical.rad"] w_x, w_y, _ = l["xyzobs.mm.weights"].parts() w_delpsi = l["delpsical.weights"] except KeyError: return header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] row_data = five_number_summary(x_resid) rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(y_resid) rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(delpsi) rows.append(["DeltaPsi (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_x) rows.append(["X weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_y) rows.append(["Y weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_delpsi) rows.append( ["DeltaPsi weights"] + ["%.4g" % (e * DEG2RAD ** 2) for e in row_data] ) msg = ( "\nSummary statistics for {} observations".format(nref) + " matched to predictions:" ) logger.info(msg) st = simple_table(rows, header) logger.info(st.format()) logger.info("")
def run_analysis(flags, reflections): """Print a table of flags present in the reflections file""" header = ["flag", "nref"] rows = [] for name, val in flags: n = (reflections.get_flags(val)).count(True) if n > 0: rows.append([name, "%d" % n]) if rows: st = simple_table(rows, header) print(st.format()) else: print("No flags set") return
def analysis(self, reflections): '''Print a table of flags present in the reflections file''' from libtbx.table_utils import simple_table header = ['flag', 'nref'] rows = [] for name, val in zip(self.flag_names, self.flag_values): n = (reflections.get_flags(val)).count(True) if n > 0: rows.append([name, "%d" % n]) if len(rows) > 0: st = simple_table(rows, header) print st.format() else: print "No flags set" return
def cell_param_table(crystal): '''Construct a table of cell parameters and their ESDs''' from libtbx.table_utils import simple_table cell = crystal.get_unit_cell().parameters() esd = crystal.get_cell_parameter_sd() vol = crystal.get_unit_cell().volume() vol_esd = crystal.get_cell_volume_sd() header = ["Parameter", "Value", "Estimated sd"] rows = [] names = ["a", "b", "c", "alpha", "beta", "gamma"] for n, p, e in zip(names, cell, esd): rows.append([n, "%9.5f" % p, "%9.5f" % e]) rows.append(["\nvolume", "\n%9.5f" % vol, "\n%9.5f" % vol_esd]) st = simple_table(rows, header) return st.format()
def analysis(self, reflections): '''Print a table of flags present in the reflections file''' from libtbx.table_utils import simple_table header = ['flag','nref'] rows = [] for name, val in zip(self.flag_names, self.flag_values): n = (reflections.get_flags(val)).count(True) if n > 0: rows.append([name, "%d" % n]) if len(rows) > 0: st = simple_table(rows, header) print st.format() else: print "No flags set" return
def run(self): """Execute the script.""" from dials.util.options import flatten_experiments from dials.util import Sorry from dials.array_family import flex # Parse the command line params, options = self.parser.parse_args(show_diff_phil=True) # Try to load the experiments if not params.input.experiments: print("No Experiments found in the input") self.parser.print_help() return experiments = flatten_experiments(params.input.experiments) print("{0} experiments loaded".format(len(experiments))) us0_vecs = self.extract_us0_vecs(experiments) e_vecs = self.extract_rotation_axes(experiments) angles = [us0.angle(e, deg=True) for us0, e in zip(us0_vecs, e_vecs)] fmt = "{:." + str(params.print_precision) + "f}" header = ["Exp\nid", "Beam direction", "Rotation axis", "Angle (deg)"] rows = [] for iexp, (us0, e, ang) in enumerate(zip(us0_vecs, e_vecs, angles)): beam_str = " ".join([fmt] * 3).format(*us0.elems) e_str = " ".join([fmt] * 3).format(*e.elems) rows.append([str(iexp), beam_str, e_str, fmt.format(ang)]) if len(rows) > 0: st = simple_table(rows, header) print(st.format()) # mean and sd if len(rows) > 1: angles = flex.double(angles) mv = flex.mean_and_variance(angles) print("Mean and standard deviation of the angle") print( fmt.format(mv.mean()) + " +/- " + fmt.format(mv.unweighted_sample_standard_deviation())) print() return
def run(self): '''Execute the script.''' from dials.util.options import flatten_experiments from libtbx.utils import Sorry from dials.array_family import flex # Parse the command line params, options = self.parser.parse_args(show_diff_phil=True) # Try to load the experiments if not params.input.experiments: print "No Experiments found in the input" self.parser.print_help() return experiments = flatten_experiments(params.input.experiments) print "{0} experiments loaded".format(len(experiments)) us0_vecs = self.extract_us0_vecs(experiments) e_vecs = self.extract_rotation_axes(experiments) angles = [us0.angle(e, deg=True) for us0, e in zip(us0_vecs, e_vecs)] fmt = "{:." + str(params.print_precision) + "f}" header = ['Exp\nid','Beam direction', 'Rotation axis', 'Angle (deg)'] rows = [] for iexp, (us0, e, ang) in enumerate(zip(us0_vecs, e_vecs, angles)): beam_str = " ".join([fmt] * 3).format(*us0.elems) e_str = " ".join([fmt] * 3).format(*e.elems) rows.append([str(iexp), beam_str, e_str, fmt.format(ang)]) if len(rows) > 0: st = simple_table(rows, header) print st.format() # mean and sd if len(rows) > 1: angles = flex.double(angles) mv = flex.mean_and_variance(angles) print "Mean and standard deviation of the angle" print (fmt.format(mv.mean()) + " +/- " + fmt.format( mv.unweighted_sample_standard_deviation())) print return
def _show_impl (self, out) : ss,rr,ii,zz = self.data_as_flex_arrays() flagged = zz > self.cutoff sel_ss = ss.select(flagged) sel_z = zz.select(flagged) sel_r = rr.select(flagged) sel_i = ii.select(flagged) out.show_sub_header("Relative Wilson plot") out.show_text("""\ The relative Wilson plot compares the mean intensity of the observed data with the mean intensity computed from the model, as a function of resolution. This curve is expected to fall off at low resolution if no contribution for bulk solvent is provided for the calculated intensities, because the presence of bulk solvent reduces the observed intensities at low resolution by reducing the contrast. At high resolution, the curve should be a straight line with a slope that reflects the difference in overall B-factor between the model and the data. Compared to the normal Wilson plot, the relative Wilson plot is more linear because the influence of favored distances between atoms, caused by bonding and secondary structure, is cancelled out. """) out.show_plot(self.table) if (self.all_bad_z_scores) : out.warn("""\ All resolution shells have Z-scores above %4.2f sigma. This is indicative of severe problems with the input data, including processing errors or ice rings. We recommend checking the logs for data processing and inspecting the raw images.\n""" % self.cutoff) else : out.show_text("""\ All relative wilson plot outliers above %4.2f sigma are reported. """ % self.cutoff) out.newline() rows = [] if len(sel_ss) > 0: for s,z,r,i in zip(sel_ss,sel_z,sel_r,sel_i): sss = math.sqrt(1.0/s) rows.append([ "%8.2f" % sss, "%9.3e" % r, "%9.3e" % i, "%5.2f" % z ]) table = table_utils.simple_table( column_headers=["d-spacing", "Obs. Log[ratio]", "Expected Log[ratio]", "Z-score"], table_rows=rows) out.show_table(table) else: out.show("The Relative wilson plot doesn't indicate any serious errors.")
def __init__(self, scaler, use_Imid=None): if "intensity.prf.value" not in scaler.reflection_table: self.max_key = 1 logger.info( "No profile intensities found, skipping profile/summation intensity combination." ) return self.scaler = scaler self.experiment = scaler.experiment if use_Imid is not None: self.max_key = use_Imid else: self.Imids = scaler.params.reflection_selection.combine.Imid self.dataset = _make_reflection_table_from_scaler(self.scaler) if "partiality" in self.dataset: raw_intensities = ( self.dataset["intensity.sum.value"].as_double() / self.dataset["partiality"]) else: raw_intensities = self.dataset[ "intensity.sum.value"].as_double() logger.debug("length of raw intensity array: %s", raw_intensities.size()) self._determine_Imids(raw_intensities) header = ["Combination", "CC1/2", "Rmeas"] rows, results = self._test_Imid_combinations() st = simple_table(rows, header) logger.info(st.format()) self.max_key = min(results, key=results.get) if self.max_key == 0: logger.info( "Profile intensities determined to be best for scaling. \n" ) elif self.max_key == 1: logger.info( "Summation intensities determined to be best for scaling. \n" ) else: logger.info( "Combined intensities with Imid = %s determined to be best for scaling. \n", self.max_key, )
def print_stats_on_matches(self): """Print some basic statistics on the matches""" l = self.get_matches() nref = len(l) if nref == 0: logger.warning( "Unable to calculate summary statistics for zero observations" ) return try: x_resid = l["x_resid"] y_resid = l["y_resid"] phi_resid = l["phi_resid"] w_x, w_y, w_phi = l["xyzobs.mm.weights"].parts() except KeyError: return msg = ( "\nSummary statistics for {} observations".format(nref) + " matched to predictions:" ) header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] row_data = five_number_summary(x_resid) rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(y_resid) rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(phi_resid) rows.append(["Phic - Phio (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_x) rows.append(["X weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_y) rows.append(["Y weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_phi) rows.append(["Phi weights"] + ["%.4g" % (e * DEG2RAD ** 2) for e in row_data]) st = simple_table(rows, header) logger.info(msg) logger.info(st.format()) logger.info("")
def minimisation_summary(self): """Output a summary of model minimisation to the logger.""" header = ["Intensity range (<Ih>)", "n_refl", "variance(norm_dev)"] rows = [] bin_bounds = ["%.2f" % i for i in self.binning_info["bin_boundaries"]] for i, (bin_var, n_refl) in enumerate( zip(self.binning_info["bin_variances"], self.binning_info["refl_per_bin"])): rows.append([ bin_bounds[i] + " - " + bin_bounds[i + 1], str(n_refl), str(round(bin_var, 3)), ]) st = simple_table(rows, header) logger.info("\n".join(( "Intensity bins used during error model refinement:", st.format(), "variance(norm_dev) expected to be ~ 1 for each bin.", "", )))
def print_step_table(refinery): """print useful output about refinement steps in the form of a simple table""" logger.info("\nRefinement steps:") header = ["Step", "Nref"] for (name, units) in zip(refinery._target.rmsd_names, refinery._target.rmsd_units): header.append(name + "\n(" + units + ")") rows = [] for i in range(refinery.history.get_nrows()): rmsds = [r for r in refinery.history["rmsd"][i]] rows.append( [str(i), str(refinery.history["num_reflections"][i])] + ["%.5g" % r for r in rmsds]) st = simple_table(rows, header) logger.info(st.format()) logger.info(refinery.history.reason_for_termination)
def print_out_of_sample_rmsd_table(self): """print out-of-sample RSMDs per step, if these were tracked""" from libtbx.table_utils import simple_table # check if it makes sense to proceed if "out_of_sample_rmsd" not in self._refinery.history: return nref = len(self.get_free_reflections()) if nref < 10: return # don't do anything if very few refs logger.info("\nRMSDs for out-of-sample (free) reflections:") rmsd_multipliers = [] header = ["Step", "Nref"] for (name, units) in zip(self._target.rmsd_names, self._target.rmsd_units): if units == "mm": header.append(name + "\n(mm)") rmsd_multipliers.append(1.0) elif units == "rad": # convert radians to degrees for reporting header.append(name + "\n(deg)") rmsd_multipliers.append(RAD2DEG) else: # leave unknown units alone header.append(name + "\n(" + units + ")") rows = [] for i in range(self._refinery.history.get_nrows()): rmsds = [ r * m for r, m in zip( self._refinery.history["out_of_sample_rmsd"][i], rmsd_multipliers) ] rows.append([str(i), str(nref)] + ["%.5g" % e for e in rmsds]) st = simple_table(rows, header) logger.info(st.format()) return
def run(self): '''Execute the script.''' # Parse the command line params, options = self.parser.parse_args(show_diff_phil=True) try: assert len(params.input.reflections) == len(params.input.datablock) except AssertionError: raise Sorry( "The number of input reflections files does not match the " "number of input datablocks") datablocks = flatten_datablocks(params.input.datablock) reflections = flatten_reflections(params.input.reflections) if len(reflections): r = self.combine_reflections(reflections) # print number of reflections per imageset from libtbx.table_utils import simple_table max_id = max(r['id']) header = ["Imageset", "Nref"] nrefs_per_imset = [(r['id'] == i).count(True) for i in range(max_id + 1)] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_imset)] st = simple_table(rows, header) print(st.format()) rf = params.output.reflections_filename print('Saving combined reflections to {0}'.format(rf)) r.as_pickle(rf) if len(datablocks): db = self.combine_datablocks(datablocks) dbf = params.output.datablocks_filename print('Saving combined datablocks to {0}'.format(dbf)) dump = DataBlockDumper(db) dump.as_file(dbf, compact=params.output.compact) return
def __init__(self, multiscaler): self.active_scalers = multiscaler.active_scalers self.experiment = multiscaler.experiment self.Imids = multiscaler.params.reflection_selection.combine.Imid # first copy across relevant data that's needed self.good_datasets = [] for i, scaler in enumerate(self.active_scalers): if "intensity.prf.value" in scaler.reflection_table: self.good_datasets.append(i) self.datasets = [ _make_reflection_table_from_scaler(self.active_scalers[i]) for i in self.good_datasets ] raw_intensities = self._get_raw_intensity_array() logger.debug("length of raw intensity array: %s", raw_intensities.size()) self._determine_Imids(raw_intensities) header = ["Combination", "CC1/2", "Rmeas"] rows, results = self._test_Imid_combinations() st = simple_table(rows, header) logger.info(st.format()) self.max_key = min(results, key=results.get) while results[self.max_key] < 0: del results[self.max_key] self.max_key = min(results, key=results.get) if self.max_key == 0: logger.info( "Profile intensities determined to be best for scaling. \n") elif self.max_key == 1: logger.info( "Summation intensities determined to be best for scaling. \n") else: logger.info( "Combined intensities with Imid = %s determined to be best for scaling. \n", self.max_key, )
def print_step_table(self): """print useful output about refinement steps in the form of a simple table""" from libtbx.table_utils import simple_table logger.info("\nRefinement steps:") rmsd_multipliers = [] header = ["Step", "Nref"] for (name, units) in zip(self._target.rmsd_names, self._target.rmsd_units): if units == "mm": header.append(name + "\n(mm)") rmsd_multipliers.append(1.0) elif units == "rad": # convert radians to degrees for reporting header.append(name + "\n(deg)") rmsd_multipliers.append(RAD2DEG) else: # leave unknown units alone header.append(name + "\n(" + units + ")") rows = [] for i in range(self._refinery.history.get_nrows()): rmsds = [ r * m for (r, m) in zip(self._refinery.history["rmsd"][i], rmsd_multipliers) ] rows.append( [str(i), str(self._refinery.history["num_reflections"][i])] + ["%.5g" % r for r in rmsds]) st = simple_table(rows, header) logger.info(st.format()) logger.info(self._refinery.history.reason_for_termination) return
def run(self): print "Parsing input" params, options = self.parser.parse_args(show_diff_phil=True) #Configure the logging log.config(params.detector_phase.refinement.verbosity, info='dials.refine.log', debug='dials.refine.debug.log') # Try to obtain the models and data if not params.input.experiments: raise Sorry("No Experiments found in the input") if not params.input.reflections: raise Sorry("No reflection data found in the input") try: assert len(params.input.reflections) == len(params.input.experiments) except AssertionError: raise Sorry("The number of input reflections files does not match the " "number of input experiments") # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 from dxtbx.model.experiment.experiment_list import ExperimentList experiments=ExperimentList() if params.reference_detector == "first": # Use the first experiment of the first experiment list as the reference detector ref_exp = params.input.experiments[0].data[0] else: # Average all the detectors to generate a reference detector assert params.detector_phase.refinement.parameterisation.detector.hierarchy_level == 0 from scitbx.matrix import col panel_fasts = [] panel_slows = [] panel_oris = [] for exp_wrapper in params.input.experiments: exp = exp_wrapper.data[0] if panel_oris: for i, panel in enumerate(exp.detector): panel_fasts[i] += col(panel.get_fast_axis()) panel_slows[i] += col(panel.get_slow_axis()) panel_oris[i] += col(panel.get_origin()) else: for i, panel in enumerate(exp.detector): panel_fasts.append(col(panel.get_fast_axis())) panel_slows.append(col(panel.get_slow_axis())) panel_oris.append(col(panel.get_origin())) ref_exp = copy.deepcopy(params.input.experiments[0].data[0]) for i, panel in enumerate(ref_exp.detector): # Averaging the fast and slow axes can make them be non-orthagonal. Fix by finding # the vector that goes exactly between them and rotate # around their cross product 45 degrees from that vector in either direction vf = panel_fasts[i]/len(params.input.experiments) vs = panel_slows[i]/len(params.input.experiments) c = vf.cross(vs) angle = vf.angle(vs, deg=True) v45 = vf.rotate(c, angle/2, deg=True) vf = v45.rotate(c, -45, deg=True) vs = v45.rotate(c, 45, deg=True) panel.set_frame(vf, vs, panel_oris[i]/len(params.input.experiments)) print "Reference detector (averaged):", str(ref_exp.detector) # set the experiment factory that combines a crystal with the reference beam # and the reference detector experiment_from_crystal=ExperimentFromCrystal(ref_exp.beam, ref_exp.detector) # keep track of the number of refl per accepted experiment for a table nrefs_per_exp = [] # loop through the input, building up the global lists for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data # there might be multiple experiments already here. Loop through them for i, exp in enumerate(exps): # select the relevant reflections sel = refs['id'] == i sub_ref = refs.select(sel) ## DGW commented out as reflections.minimum_number_of_reflections no longer exists #if len(sub_ref) < params.crystals_phase.refinement.reflections.minimum_number_of_reflections: # print "skipping experiment", i, "in", exp_wrapper.filename, "due to insufficient strong reflections in", ref_wrapper.filename # continue # build an experiment with this crystal plus the reference models combined_exp = experiment_from_crystal(exp.crystal) # next experiment ID in series exp_id = len(experiments) # check this experiment if not check_experiment(combined_exp, sub_ref): print "skipping experiment", i, "in", exp_wrapper.filename, "due to poor RMSDs" continue # set reflections ID sub_ref['id'] = flex.int(len(sub_ref), exp_id) # keep number of reflections for the table nrefs_per_exp.append(len(sub_ref)) # obtain mm positions on the reference detector sub_ref = indexer_base.map_spots_pixel_to_mm_rad(sub_ref, combined_exp.detector, combined_exp.scan) # extend refl and experiments lists reflections.extend(sub_ref) experiments.append(combined_exp) # print number of reflections per accepted experiment from libtbx.table_utils import simple_table header = ["Experiment", "Nref"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print "Number of reflections per experiment" print st.format() for cycle in range(params.n_macrocycles): print "MACROCYCLE %02d" % (cycle + 1) print "=============\n" # first run: multi experiment joint refinement of detector with fixed beam and # crystals print "PHASE 1" # SET THIS TEST TO FALSE TO REFINE WHOLE DETECTOR AS SINGLE JOB if params.detector_phase.refinement.parameterisation.detector.hierarchy_level > 0: experiments = detector_parallel_refiners(params.detector_phase, experiments, reflections) else: experiments = detector_refiner(params.detector_phase, experiments, reflections) # second run print "PHASE 2" experiments = crystals_refiner(params.crystals_phase, experiments, reflections) # Save the refined experiments to file output_experiments_filename = params.output.experiments_filename print 'Saving refined experiments to {0}'.format(output_experiments_filename) from dxtbx.model.experiment.experiment_list import ExperimentListDumper dump = ExperimentListDumper(experiments) dump.as_json(output_experiments_filename) # Write out refined reflections, if requested if params.output.reflections_filename: print 'Saving refined reflections to {0}'.format( params.output.reflections_filename) reflections.as_pickle(params.output.reflections_filename) return
def run(self): '''Execute the script.''' from dials.util.options import flatten_experiments from libtbx.utils import Sorry # Parse the command line params, options = self.parser.parse_args(show_diff_phil=True) # Try to load the models and data if len(params.input.experiments) == 0: print "No Experiments found in the input" self.parser.print_help() return if len(params.input.reflections) == 0: print "No reflection data found in the input" self.parser.print_help() return try: assert len(params.input.reflections) == len(params.input.experiments) except AssertionError: raise Sorry("The number of input reflections files does not match the " "number of input experiments") flat_exps = flatten_experiments(params.input.experiments) ref_beam = params.reference_from_experiment.beam ref_goniometer = params.reference_from_experiment.goniometer ref_scan = params.reference_from_experiment.scan ref_crystal = params.reference_from_experiment.crystal ref_detector = params.reference_from_experiment.detector if ref_beam is not None: try: ref_beam = flat_exps[ref_beam].beam except IndexError: raise Sorry("{0} is not a valid experiment ID".format(ref_beam)) if ref_goniometer is not None: try: ref_goniometer = flat_exps[ref_goniometer].goniometer except IndexError: raise Sorry("{0} is not a valid experiment ID".format(ref_goniometer)) if ref_scan is not None: try: ref_scan = flat_exps[ref_scan].scan except IndexError: raise Sorry("{0} is not a valid experiment ID".format(ref_scan)) if ref_crystal is not None: try: ref_crystal = flat_exps[ref_crystal].crystal except IndexError: raise Sorry("{0} is not a valid experiment ID".format(ref_crystal)) if ref_detector is not None: assert not params.reference_from_experiment.average_detector try: ref_detector = flat_exps[ref_detector].detector except IndexError: raise Sorry("{0} is not a valid experiment ID".format(ref_detector)) elif params.reference_from_experiment.average_detector: # Average all of the detectors together from scitbx.matrix import col def average_detectors(target, panelgroups, depth): # Recursive function to do the averaging if params.reference_from_experiment.average_hierarchy_level is None or \ depth == params.reference_from_experiment.average_hierarchy_level: n = len(panelgroups) sum_fast = col((0.0,0.0,0.0)) sum_slow = col((0.0,0.0,0.0)) sum_ori = col((0.0,0.0,0.0)) # Average the d matrix vectors for pg in panelgroups: sum_fast += col(pg.get_local_fast_axis()) sum_slow += col(pg.get_local_slow_axis()) sum_ori += col(pg.get_local_origin()) sum_fast /= n sum_slow /= n sum_ori /= n # Re-orthagonalize the slow and the fast vectors by rotating around the cross product c = sum_fast.cross(sum_slow) a = sum_fast.angle(sum_slow, deg=True)/2 sum_fast = sum_fast.rotate(c, a-45, deg=True) sum_slow = sum_slow.rotate(c, -(a-45), deg=True) target.set_local_frame(sum_fast,sum_slow,sum_ori) if target.is_group(): # Recurse for i, target_pg in enumerate(target): average_detectors(target_pg, [pg[i] for pg in panelgroups], depth+1) ref_detector = flat_exps[0].detector average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0) combine = CombineWithReference(beam=ref_beam, goniometer=ref_goniometer, scan=ref_scan, crystal=ref_crystal, detector=ref_detector, params=params) # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 from dxtbx.model.experiment.experiment_list import ExperimentList experiments=ExperimentList() # loop through the input, building up the global lists nrefs_per_exp = [] for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data for i, exp in enumerate(exps): sel = refs['id'] == i sub_ref = refs.select(sel) nrefs_per_exp.append(len(sub_ref)) sub_ref['id'] = flex.int(len(sub_ref), global_id) reflections.extend(sub_ref) experiments.append(combine(exp)) global_id += 1 # print number of reflections per experiment from libtbx.table_utils import simple_table header = ["Experiment", "Nref"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print st.format() # save a random subset if requested if params.output.n_subset is not None and len(experiments) > params.output.n_subset: import random subset_exp = ExperimentList() subset_refls = flex.reflection_table() n_picked = 0 indices = range(len(experiments)) while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices)-1)) subset_exp.append(experiments[idx]) refls = reflections.select(reflections['id'] == idx) refls['id'] = flex.int(len(refls), n_picked) subset_refls.extend(refls) n_picked += 1 experiments = subset_exp reflections = subset_refls # save output from dxtbx.model.experiment.experiment_list import ExperimentListDumper print 'Saving combined experiments to {0}'.format( params.output.experiments_filename) dump = ExperimentListDumper(experiments) dump.as_json(params.output.experiments_filename) print 'Saving combined reflections to {0}'.format( params.output.reflections_filename) reflections.as_pickle(params.output.reflections_filename) return
def check_conditions(self,abs_lower_i_threshold=1e-6): table_labels = ('Operator', "# expected systematic absences", "<I/sigI> (violations)", "# expected non absences", "<I/sigI> (violations)", "# other reflections", "<I/sigI> (violations)", "Score") for item in [0]: # absence_class in self.abs_check.absence_classes[ self.sg.group().crystal_system() ]: table_rows = [] for condition in self.abs_check.absence_classes[ str(sgtbx.space_group_info( group=self.sg.group().build_derived_reflection_intensity_group(False))\ .as_reference_setting() ) ] : # crystal_system() ]: n_abs = 0 n_n_abs = 0 n_tot = 0 n_abs_viol = 0 n_n_abs_viol = 0 n_tot_viol = 0 isi_abs = 0 isi_n_abs = 0 isi_tot = 0 i_abs = 0 i_n_abs = 0 i_tot = 0 score = 0 for hkl, centric_flag, i, sigi in zip(self.miller_array.indices(), self.miller_array.centric_flags(), self.miller_array.data(), self.miller_array.sigmas() ): mc, cc = self.abs_check.check(condition,hkl, return_bool=True) if abs(i) < abs_lower_i_threshold: sigi=max(sigi,abs_lower_i_threshold) if mc: # mask checks out if cc: # not absent n_n_abs += 1 isi_n_abs += i/sigi i_n_abs += i # should be present. flag if not significant if i/sigi < self.cut: n_n_abs_viol += 1 score += likelihood(i,sigi,centric_flag[1],self.sigma_inflation) else: #absent n_abs += 1 isi_abs += i/sigi i_abs += i # should be absent: flag if significant if i/sigi > self.cut: n_abs_viol += 1 score += likelihood( i,sigi,None) else: n_tot +=1 isi_tot += i/sigi i_tot += i if i/sigi < self.cut: n_tot_viol += 1 if n_abs > 0: isi_abs = isi_abs/n_abs i_abs = i_abs/n_abs if n_n_abs > 0: isi_n_abs = isi_n_abs/n_n_abs i_n_abs = i_n_abs/n_n_abs if n_tot > 0: isi_tot = isi_tot/n_tot i_tot = i_tot/n_tot self.n_abs.append(n_abs) self.n_n_abs.append(n_n_abs) self.n_tot.append(n_tot) self.n_abs_viol.append(n_abs_viol) self.n_n_abs_viol.append(n_n_abs_viol) self.n_tot_viol.append(n_tot_viol) self.isi_abs.append(isi_abs) self.isi_n_abs.append(isi_n_abs) self.isi_tot.append(isi_tot) self.i_abs.append(i_abs) self.i_n_abs.append(i_n_abs) self.i_tot.append(i_tot) self.op_name.append( condition ) score = float(score)/max(1,n_abs+n_n_abs) self.score.append( score ) table_rows.append( [condition, str("%8.0f"%(n_abs)), str("%8.2f (%i, %4.1f%%)" % (isi_abs, n_abs_viol, 100.0*float(n_abs_viol)/max(1,n_abs))), str("%8.0f"%(n_n_abs)), str("%8.2f (%i, %4.1f%%)" % (isi_n_abs, n_n_abs_viol, 100.0*float(n_n_abs_viol)/max(1,n_n_abs))), str("%8.0f"%(n_tot)), str("%8.2f (%i, %4.1f%%)" % (isi_tot, n_tot_viol, 100.0*float(n_tot_viol)/max(1,n_tot))), str("%8.2e"%(abs(score))) ]) self.table = table_utils.simple_table( column_headers=table_labels, table_rows=table_rows)
def sum_partial_reflections(reflection_table): """Sum partial reflections if more than one recording of a reflection present. This is a weighted sum for summation integration; weighted average for profile fitted reflections. N.B. this will report total partiality for the summed reflection. """ nrefl = reflection_table.size() intensities = [] for intensity in ["prf", "scale", "sum"]: if "intensity." + intensity + ".value" in reflection_table: intensities.append(intensity) isel = (reflection_table["partiality"] < 0.99).iselection() if not isel: return reflection_table # create map of partial_id to reflections delete = flex.size_t() partial_map = defaultdict(list) for j in isel: partial_map[reflection_table["partial_id"][j]].append(j) # now work through this map - get total partiality for every reflection; # here only consider reflections with > 1 component; partial_ids = [] for p_id in partial_map: if len(partial_map[p_id]) > 1: partial_ids.append(p_id) header = ["Partial id", "Partiality"] for i in intensities: header.extend([str(i) + " intensity", str(i) + " variance"]) rows = [] # Now loop through 'matched' partials, summing and then deleting before return for p_id in partial_ids: j = partial_map[p_id] for i in j: data = [str(p_id), str(reflection_table["partiality"][i])] for intensity in intensities: data.extend( [ str(reflection_table["intensity." + intensity + ".value"][i]), str( reflection_table["intensity." + intensity + ".variance"][i] ), ] ) rows.append(data) # do the summing of the partiality values separately to allow looping # over multiple times total_partiality = sum([reflection_table["partiality"][i] for i in j]) if "prf" in intensities: reflection_table = _sum_prf_partials(reflection_table, j) if "sum" in intensities: reflection_table = _sum_sum_partials(reflection_table, j) if "scale" in intensities: reflection_table = _sum_scale_partials(reflection_table, j) # FIXME now that the partials have been summed, should fractioncalc be set # to one (except for summation case?) reflection_table["partiality"][j[0]] = total_partiality delete.extend(flex.size_t(j[1:])) data = ["combined " + str(p_id), str(total_partiality)] for intensity in intensities: data.extend( [ str(reflection_table["intensity." + intensity + ".value"][j[0]]), str(reflection_table["intensity." + intensity + ".variance"][j[0]]), ] ) rows.append(data) reflection_table.del_selected(delete) if nrefl > reflection_table.size(): logger.info( "Combined %s partial reflections with other partial reflections" % (nrefl - reflection_table.size()) ) logger.debug("\nSummary of combination of partial reflections") st = simple_table(rows, header) logger.debug(st.format()) return reflection_table
def __call__(self, reflections): """Identify outliers in the input and set the centroid_outlier flag. Return True if any outliers were detected, otherwise False""" if self._verbosity > 0: logger.info("Detecting centroid outliers using the {0} algorithm".format(type(self).__name__)) # check the columns are present for col in self._cols: assert col in reflections sel = reflections.get_flags(reflections.flags.used_in_refinement) all_data = reflections.select(sel) all_data_indices = sel.iselection() nexp = flex.max(all_data["id"]) + 1 jobs = [] if self._separate_experiments: # split the data set by experiment id for iexp in xrange(nexp): sel = all_data["id"] == iexp job = { "id": iexp, "panel": "all", "data": all_data.select(sel), "indices": all_data_indices.select(sel), } jobs.append(job) else: # keep the whole dataset across all experiment ids job = {"id": "all", "panel": "all", "data": all_data, "indices": all_data_indices} jobs.append(job) jobs2 = [] if self._separate_panels: # split further by panel id for job in jobs: data = job["data"] iexp = job["id"] indices = job["indices"] for ipanel in xrange(flex.max(data["panel"]) + 1): sel = data["panel"] == ipanel job = {"id": iexp, "panel": ipanel, "data": data.select(sel), "indices": indices.select(sel)} jobs2.append(job) else: # keep the splits as they are jobs2 = jobs jobs3 = [] if self.get_block_width() is not None: # split into equal-sized phi ranges for job in jobs2: data = job["data"] iexp = job["id"] ipanel = job["panel"] indices = job["indices"] phi = data["xyzobs.mm.value"].parts()[2] if len(phi) == 0: # detect no data in the job jobs3.append(job) continue phi_low = flex.min(phi) phi_range = flex.max(phi) - phi_low if phi_range == 0.0: # detect stills and do not split jobs3.append(job) continue bw = self.get_block_width(iexp) if bw is None: # detect no split for this experiment jobs3.append(job) continue nblocks = int(round(RAD2DEG * phi_range / bw)) nblocks = max(1, nblocks) real_width = phi_range / nblocks block_end = 0.0 for iblock in xrange(nblocks - 1): # all except the last block block_start = iblock * real_width block_end = (iblock + 1) * real_width sel = (phi >= (phi_low + block_start)) & (phi < (phi_low + block_end)) job = { "id": iexp, "panel": ipanel, "data": data.select(sel), "indices": indices.select(sel), "phi_start": RAD2DEG * (phi_low + block_start), "phi_end": RAD2DEG * (phi_low + block_end), } jobs3.append(job) # now last block sel = phi >= (phi_low + block_end) job = { "id": iexp, "panel": ipanel, "data": data.select(sel), "indices": indices.select(sel), "phi_start": RAD2DEG * (phi_low + block_end), "phi_end": RAD2DEG * (phi_low + phi_range), } jobs3.append(job) else: # keep the splits as they are jobs3 = jobs2 # Work out the format of the jobs table if self._verbosity > 0: header = ["Job"] if self._separate_experiments: header.append("Exp\nid") if self._separate_panels: header.append("Panel\nid") if self.get_block_width() is not None: header.append("Block range\n(deg)") header.extend(["Nref", "Nout", "%out"]) rows = [] # now loop over the lowest level of splits for i, job in enumerate(jobs3): data = job["data"] indices = job["indices"] iexp = job["id"] ipanel = job["panel"] nref = len(indices) if nref >= self._min_num_obs: # get the subset of data as a list of columns cols = [data[col] for col in self._cols] # determine the position of outliers on this sub-dataset outliers = self._detect_outliers(cols) # get positions of outliers from the original matches ioutliers = indices.select(outliers) elif nref > 0: # too few reflections in the job msg = "For job {0}, fewer than {1} reflections are present.".format(i + 1, self._min_num_obs) msg += " All reflections flagged as possible outliers." if self._verbosity > 0: logger.debug(msg) ioutliers = indices else: # no reflections in the job ioutliers = indices # set the centroid_outlier flag in the original reflection table nout = len(ioutliers) if nout > 0: reflections.set_flags(ioutliers, reflections.flags.centroid_outlier) self.nreject += nout # Add job data to the table if self._verbosity > 0: row = [str(i + 1)] if self._separate_experiments: row.append(str(iexp)) if self._separate_panels: row.append(str(ipanel)) if self.get_block_width() is not None: try: row.append("{phi_start:.2f} - {phi_end:.2f}".format(**job)) except KeyError: row.append("{0:.2f} - {1:.2f}".format(0.0, 0.0)) if nref == 0: p100 = 0 else: p100 = nout / nref * 100.0 if p100 > 30.0: msg = ("{0:3.1f}% of reflections were flagged as outliers from job" " {1}").format(p100, i + 1) row.extend([str(nref), str(nout), "%3.1f" % p100]) rows.append(row) if self.nreject == 0: return False if self._verbosity > 0: logger.info("{0} reflections have been flagged as outliers".format(self.nreject)) logger.debug("Outlier rejections per job:") st = simple_table(rows, header) logger.debug(st.format()) return True
def print_exp_rmsd_table(self): """print useful output about refinement steps in the form of a simple table""" from libtbx.table_utils import simple_table logger.info("\nRMSDs by experiment:") header = ["Exp\nid", "Nref"] for (name, units) in zip(self._target.rmsd_names, self._target.rmsd_units): if name == "RMSD_X" or name == "RMSD_Y" and units == "mm": header.append(name + "\n(px)") elif name == "RMSD_Phi" and units == "rad": # will convert radians to images for reporting of scans header.append("RMSD_Z" + "\n(images)") elif units == "rad": # will convert other angles in radians to degrees (e.g. for # RMSD_DeltaPsi and RMSD_2theta) header.append(name + "\n(deg)") else: # skip other/unknown RMSDs pass rows = [] for iexp, exp in enumerate(self._experiments): detector = exp.detector px_sizes = [p.get_pixel_size() for p in detector] it = iter(px_sizes) px_size = next(it) if not all(tst == px_size for tst in it): logger.info( "The detector in experiment %d does not have the same pixel " + "sizes on each panel. Skipping...", iexp, ) continue px_per_mm = [1.0 / e for e in px_size] scan = exp.scan try: images_per_rad = 1.0 / abs(scan.get_oscillation(deg=False)[1]) except (AttributeError, ZeroDivisionError): images_per_rad = None raw_rmsds = self._target.rmsds_for_experiment(iexp) if raw_rmsds is None: continue # skip experiments where rmsd cannot be calculated num = self._target.get_num_matches_for_experiment(iexp) rmsds = [] for (name, units, rmsd) in zip(self._target.rmsd_names, self._target.rmsd_units, raw_rmsds): if name == "RMSD_X" and units == "mm": rmsds.append(rmsd * px_per_mm[0]) elif name == "RMSD_Y" and units == "mm": rmsds.append(rmsd * px_per_mm[1]) elif name == "RMSD_Phi" and units == "rad": rmsds.append(rmsd * images_per_rad) elif units == "rad": rmsds.append(rmsd * RAD2DEG) rows.append([str(iexp), str(num)] + ["%.5g" % r for r in rmsds]) if len(rows) > 0: st = simple_table(rows, header) logger.info(st.format()) return
def run(self): print("Parsing input") params, options = self.parser.parse_args(show_diff_phil=True) #Configure the logging log.config(params.detector_phase.refinement.verbosity, info='dials.refine.log', debug='dials.refine.debug.log') # Try to obtain the models and data if not params.input.experiments: raise Sorry("No Experiments found in the input") if not params.input.reflections: raise Sorry("No reflection data found in the input") try: assert len(params.input.reflections) == len( params.input.experiments) except AssertionError: raise Sorry( "The number of input reflections files does not match the " "number of input experiments") # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 from dxtbx.model.experiment_list import ExperimentList experiments = ExperimentList() if params.reference_detector == "first": # Use the first experiment of the first experiment list as the reference detector ref_exp = params.input.experiments[0].data[0] else: # Average all the detectors to generate a reference detector assert params.detector_phase.refinement.parameterisation.detector.hierarchy_level == 0 from scitbx.matrix import col panel_fasts = [] panel_slows = [] panel_oris = [] for exp_wrapper in params.input.experiments: exp = exp_wrapper.data[0] if panel_oris: for i, panel in enumerate(exp.detector): panel_fasts[i] += col(panel.get_fast_axis()) panel_slows[i] += col(panel.get_slow_axis()) panel_oris[i] += col(panel.get_origin()) else: for i, panel in enumerate(exp.detector): panel_fasts.append(col(panel.get_fast_axis())) panel_slows.append(col(panel.get_slow_axis())) panel_oris.append(col(panel.get_origin())) ref_exp = copy.deepcopy(params.input.experiments[0].data[0]) for i, panel in enumerate(ref_exp.detector): # Averaging the fast and slow axes can make them be non-orthagonal. Fix by finding # the vector that goes exactly between them and rotate # around their cross product 45 degrees from that vector in either direction vf = panel_fasts[i] / len(params.input.experiments) vs = panel_slows[i] / len(params.input.experiments) c = vf.cross(vs) angle = vf.angle(vs, deg=True) v45 = vf.rotate(c, angle / 2, deg=True) vf = v45.rotate(c, -45, deg=True) vs = v45.rotate(c, 45, deg=True) panel.set_frame(vf, vs, panel_oris[i] / len(params.input.experiments)) print("Reference detector (averaged):", str(ref_exp.detector)) # set the experiment factory that combines a crystal with the reference beam # and the reference detector experiment_from_crystal = ExperimentFromCrystal( ref_exp.beam, ref_exp.detector) # keep track of the number of refl per accepted experiment for a table nrefs_per_exp = [] # loop through the input, building up the global lists for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data # there might be multiple experiments already here. Loop through them for i, exp in enumerate(exps): # select the relevant reflections sel = refs['id'] == i sub_ref = refs.select(sel) ## DGW commented out as reflections.minimum_number_of_reflections no longer exists #if len(sub_ref) < params.crystals_phase.refinement.reflections.minimum_number_of_reflections: # print "skipping experiment", i, "in", exp_wrapper.filename, "due to insufficient strong reflections in", ref_wrapper.filename # continue # build an experiment with this crystal plus the reference models combined_exp = experiment_from_crystal(exp.crystal) # next experiment ID in series exp_id = len(experiments) # check this experiment if not check_experiment(combined_exp, sub_ref): print("skipping experiment", i, "in", exp_wrapper.filename, "due to poor RMSDs") continue # set reflections ID sub_ref['id'] = flex.int(len(sub_ref), exp_id) # keep number of reflections for the table nrefs_per_exp.append(len(sub_ref)) # obtain mm positions on the reference detector sub_ref = indexer_base.map_spots_pixel_to_mm_rad( sub_ref, combined_exp.detector, combined_exp.scan) # extend refl and experiments lists reflections.extend(sub_ref) experiments.append(combined_exp) # print number of reflections per accepted experiment from libtbx.table_utils import simple_table header = ["Experiment", "Nref"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print("Number of reflections per experiment") print(st.format()) for cycle in range(params.n_macrocycles): print("MACROCYCLE %02d" % (cycle + 1)) print("=============\n") # first run: multi experiment joint refinement of detector with fixed beam and # crystals print("PHASE 1") # SET THIS TEST TO FALSE TO REFINE WHOLE DETECTOR AS SINGLE JOB if params.detector_phase.refinement.parameterisation.detector.hierarchy_level > 0: experiments = detector_parallel_refiners( params.detector_phase, experiments, reflections) else: experiments = detector_refiner(params.detector_phase, experiments, reflections) # second run print("PHASE 2") experiments = crystals_refiner(params.crystals_phase, experiments, reflections) # Save the refined experiments to file output_experiments_filename = params.output.experiments_filename print('Saving refined experiments to {0}'.format( output_experiments_filename)) from dxtbx.model.experiment_list import ExperimentListDumper dump = ExperimentListDumper(experiments) dump.as_json(output_experiments_filename) # Write out refined reflections, if requested if params.output.reflections_filename: print('Saving refined reflections to {0}'.format( params.output.reflections_filename)) reflections.as_pickle(params.output.reflections_filename) return
def _show_impl (self, out) : out.show_header("SAD experiment planning") out.show_sub_header( "Dataset overall I/sigma required to solve a structure") self.show_characteristics(out=out) out.show_preformatted_text(""" -------Targets for entire dataset------- ----------Likely outcome-----------""") if (len(self.table_rows) == 0) : out.show_text("SAD solution unlikely with the given parameters.") return if (not out.gui_output) : out.show_preformatted_text(""" Anomalous Useful Useful Half-dataset Anom CC Anomalous Dmin N I/sigI sigF/F CC (cc*_anom) Signal P(Substr) FOM (%) (%) """) for row in self.table_rows : out.show_preformatted_text( "%s%s%s%s %s %s %s %s %s" % tuple(row)) else : table = table_utils.simple_table( table_rows=self.table_rows, column_headers=["d_min", "N", "I/sigI", "sigF/F (%)", "Half-dataset CC_ano", "CC*_ano", "Anom. signal","P(Substr)","FOM"]) out.show_table(table) (dmin,nsites,nrefl,fpp,i_over_sigma,sigf,cc_half_weak,cc_half,cc_ano_weak, cc_ano,s_ano,solved,fom) = tuple(self.representative_values) if self.missed_target_resolutions: self.missed_target_resolutions.sort() extra_note="" if self.used_max_i_over_sigma: extra_note="I/sigma shown is value of max_i_over_sigma." elif not self.input_i_over_sigma: extra_note="I/sigma shown achieves about %3.0f%% of \nmaximum anomalous signal." %(self.ratio_for_failure*100.) out.show_text(""" Note: Target anomalous signal not achievable with tested I/sigma (up to %d ) for resolutions of %5.2f A and lower. %s """ % (int(self.max_i_over_sigma),self.missed_target_resolutions[0],extra_note)) if self.skipped_resolutions: self.skipped_resolutions.sort() out.show_text(""" Note: No plausible values of I/sigma found for resolutions of %5.2f A and lower. """ % (self.skipped_resolutions[0])) out.show_text(""" This table says that if you collect your data to a resolution of %5.1f A with an overall <I>/<sigma> of about %3.0f then the half-dataset anomalous correlation should be about %5.2f (typically within a factor of 2). This should lead to a correlation of your anomalous data to true anomalous differences (CC*_ano) of about %5.2f, and a useful anomalous signal around %3.0f (again within a factor of about two). With this value of estimated anomalous signal the probability of finding the anomalous substructure is about %3d%% (based on estimated anomalous signal and actual outcomes for real structures), and the estimated figure of merit of phasing is %3.2f.""" % (dmin, i_over_sigma, cc_half, cc_ano, s_ano, int(solved), fom)) out.show_text(""" The value of sigF/F (actually rms(sigF)/rms(F)) is approximately the inverse of I/sigma. The calculations are based on rms(sigF)/rms(F). Note that these values assume data measured with little radiation damage or at least with anomalous pairs measured close in time. The values also assume that the anomalously-scattering atoms are nearly as well-ordered as other atoms. If your crystal does not fit these assumptions it may be necessary to collect data with even higher I/sigma than indicated here. Note also that anomalous signal is roughly proportional to the anomalous structure factors at a given resolution. That means that if you have 50% occupancy of your anomalous atoms, the signal will be 50% of what it otherwise would be. Also it means that if your anomalously scattering atoms only contribute to 5 A, you should only consider data to 5 A in this analysis. """) out.show_paragraph_header("""What to do next:""") out.show_text(""" 1. Collect your data, trying to obtain a value of I/sigma for the whole dataset at least as high as your target.""") out.show_text("""\ 2. Scale and analyze your unmerged data with phenix.scale_and_merge to get accurate scaled and merged data as well as two half-dataset data files that can be used to estimate the quality of your data.""") out.show_text("""\ 3. Analyze your anomalous data (the scaled merged data and the two half-datdaset data files) with phenix.anomalous_signal to estimate the anomalous signal in your data. This tool will again guess the fraction of the substructure that can be obtained with your data, this time with knowledge of the actual anomalous signal. It will also estimate the figure of merit of phasing that you can obtain once you solve the substruture. """) out.show_text("""\ 4. Compare the anomalous signal in your measured data with the estimated values in the table above. If they are lower than expected you may need to collect more data to obtain the target anomalous signal.""")
def _show_impl (self, out) : assert (self.b_cart is not None) out.show_sub_header("Maximum likelihood anisotropic Wilson scaling") out.show("ML estimate of overall B_cart value:") out.show_preformatted_text("""\ %5.2f, %5.2f, %5.2f %12.2f, %5.2f %19.2f """ % (self.b_cart[0], self.b_cart[3], self.b_cart[4], self.b_cart[1], self.b_cart[5], self.b_cart[2])) out.show("Equivalent representation as U_cif:") out.show_preformatted_text("""\ %5.2f, %5.2f, %5.2f %12.2f, %5.2f %19.2f """ % (self.u_cif[0], self.u_cif[3], self.u_cif[4], self.u_cif[1], self.u_cif[5], self.u_cif[2])) out.show("Eigen analyses of B-cart:") def format_it (x,format="%3.2f"): xx = format%(x) if x > 0: xx = " "+xx return(xx) rows = [ [ "1", format_it(self.eigen_values[0],"%5.3f"), "(%s, %s, %s)" % (format_it(self.eigen_vectors[0]), format_it(self.eigen_vectors[1]), format_it(self.eigen_vectors[2])) ], [ "2", format_it(self.eigen_values[1],"%5.3f"), "(%s, %s, %s)" % (format_it(self.eigen_vectors[3]), format_it(self.eigen_vectors[4]), format_it(self.eigen_vectors[5])) ], [ "3", format_it(self.eigen_values[2],"%5.3f"), "(%s, %s, %s)" % (format_it(self.eigen_vectors[6]), format_it(self.eigen_vectors[7]), format_it(self.eigen_vectors[8])) ], ] table = table_utils.simple_table( column_headers=["Eigenvector", "Value", "Vector"], table_rows=rows) out.show_table(table) out.show("ML estimate of -log of scale factor:") out.show_preformatted_text(" %5.2f" %(self.p_scale)) out.show_sub_header("Anisotropy analyses") if (self.eigen_values[0] == 0) : raise Sorry("Fatal error: eigenvector 1 of the overall anisotropic "+ "B-factor B_cart is zero. This "+ "may indicate severe problems with the input data, for instance "+ "if only a single plane through reciprocal space is present.") # ani_rat_p = self.aniso_ratio_p_value(self.anirat) # if ani_rat_p < 0: # ani_rat_p = 0.0 # out.show_preformatted_text("""\ #Anisotropy ( [MaxAnisoB-MinAnisoB]/[MaxAnisoB] ) : %7.3e # Anisotropic ratio p-value : %7.3e #""" % (self.anirat, ani_rat_p)) # out.show(""" # The p-value is a measure of the severity of anisotropy as observed in the PDB. # The p-value of %5.3e indicates that roughly %4.1f %% of datasets available in # the PDB have an anisotropy equal to or worse than this dataset.""" % # (ani_rat_p, 100.0*math.exp(-ani_rat_p))) message = """indicates that there probably is no significant systematic noise amplification.""" if (self.z_tot is not None) and (self.z_tot > self.z_level) : if self.mean_isigi_high_correction_factor < self.level: message = """indicates that there probably is significant systematic noise amplification that could possibly lead to artefacts in the maps or difficulties in refinement""" else: message = """indicates that there probably is some systematic dependence between the anisotropy and not-so-well-defined intensities. Because the signal to noise for the most affected intensities is relatively good, the affect on maps or refinement behavior is most likely not very serious.""" if (self.mean_count is not None) : out.show(""" For the resolution shell spanning between %4.2f - %4.2f Angstrom, the mean I/sigI is equal to %5.2f. %4.1f %% of these intensities have an I/sigI > 3. When sorting these intensities by their anisotropic correction factor and analysing the I/sigI behavior for this ordered list, we can gauge the presence of 'anisotropy induced noise amplification' in reciprocal space. """ % (self.max_d, self.min_d, self.mean_isigi, 100.0*self.mean_count)) out.show("""\ The quarter of Intensities *least* affected by the anisotropy correction show """) out.show_preformatted_text("""\ <I/sigI> : %5.2e Fraction of I/sigI > 3 : %5.2e ( Z = %8.2f )""" % (self.mean_isigi_low_correction_factor, self.frac_below_low_correction, self.z_low)) out.show("""\ The quarter of Intensities *most* affected by the anisotropy correction show """) out.show_preformatted_text("""\ <I/sigI> : %5.2e Fraction of I/sigI > 3 : %5.2e ( Z = %8.2f )""" % (self.mean_isigi_high_correction_factor, self.frac_below_high_correction, self.z_high)) #out.show(""" The combined Z-score of %8.2f %s""" % (self.z_tot, # message)) out.show("""\ Z-scores are computed on the basis of a Bernoulli model assuming independence of weak reflections with respect to anisotropy.""")
def __call__(self, reflections): """Identify outliers in the input and set the centroid_outlier flag. Return True if any outliers were detected, otherwise False""" # check the columns are present for col in self._cols: assert reflections.has_key(col) sel = reflections.get_flags(reflections.flags.used_in_refinement) all_data = reflections.select(sel) all_data_indices = sel.iselection() nexp = flex.max(all_data['id']) + 1 jobs = [] if self._separate_experiments: # split the data set by experiment id for iexp in xrange(nexp): sel = all_data['id'] == iexp job = {'id':iexp, 'panel':'all', 'data':all_data.select(sel), 'indices':all_data_indices.select(sel)} jobs.append(job) else: # keep the whole dataset across all experiment ids job = {'id':'all', 'panel':'all', 'data':all_data, 'indices':all_data_indices} jobs.append(job) jobs2 = [] if self._separate_panels: # split further by panel id for job in jobs: data = job['data'] iexp = job['id'] indices = job['indices'] for ipanel in xrange(flex.max(data['panel']) + 1): sel = data['panel'] == ipanel job2 = {'id':iexp, 'panel':ipanel, 'data':data.select(sel), 'indices':indices.select(sel)} jobs2.append(job2) else: # keep the splits as they are jobs2 = jobs # now loop over the lowest level of splits for job in jobs2: data = job['data'] indices = job['indices'] iexp = job['id'] ipanel = job['panel'] if len(indices) >= self._min_num_obs: # get the subset of data as a list of columns cols = [data[col] for col in self._cols] # determine the position of outliers on this sub-dataset outliers = self._detect_outliers(cols) # get positions of outliers from the original matches ioutliers = indices.select(outliers) else: msg = "For experiment: {0} and panel: {1}, ".format(iexp, ipanel) msg += "only {0} reflections are present. ".format(len(indices)) msg += "All of these flagged as possible outliers." if self._verbosity > 0: debug(msg) ioutliers = indices # set those reflections as outliers in the original reflection table reflections.set_flags(ioutliers, reflections.flags.centroid_outlier) self.nreject += len(ioutliers) if self.nreject == 0: return False if self._verbosity > 0: info("{0} reflections have been flagged as outliers".format(self.nreject)) if nexp > 1 and self._verbosity > 0: # table of rejections per experiment from libtbx.table_utils import simple_table header = ["Exp\nid", "Nref", "Nout", "%out"] rows = [] outlier_sel = reflections.get_flags(reflections.flags.centroid_outlier) outliers = reflections.select(outlier_sel) for iexp in xrange(nexp): nref = (reflections['id'] == iexp).count(True) nout = (outliers['id'] == iexp).count(True) if nref == 0: p100 = 0 msg = ("No reflections associated with" " Experiment with id {0}").format(iexp) warning(msg) else: p100 = nout / nref * 100.0 if p100 > 30.0: msg = ("{0:3.1f}% of reflections were flagged as outliers from the" " Experiment with id {1}").format(p100, iexp) warning(msg) rows.append(["%d" % iexp, "%d" % nref, "%d" % nout, "%3.1f" % p100]) st = simple_table(rows, header) debug("Outlier rejections per experiment:") debug(st.format()) return True
def run_with_preparsed(self, params, options): """Run combine_experiments, but allow passing in of parameters""" from dials.util.options import flatten_experiments # Try to load the models and data if len(params.input.experiments) == 0: print("No Experiments found in the input") self.parser.print_help() return if len(params.input.reflections) == 0: print("No reflection data found in the input") self.parser.print_help() return try: assert len(params.input.reflections) == len( params.input.experiments) except AssertionError: raise Sorry( "The number of input reflections files does not match the " "number of input experiments") flat_exps = flatten_experiments(params.input.experiments) ref_beam = params.reference_from_experiment.beam ref_goniometer = params.reference_from_experiment.goniometer ref_scan = params.reference_from_experiment.scan ref_crystal = params.reference_from_experiment.crystal ref_detector = params.reference_from_experiment.detector if ref_beam is not None: try: ref_beam = flat_exps[ref_beam].beam except IndexError: raise Sorry("{} is not a valid experiment ID".format(ref_beam)) if ref_goniometer is not None: try: ref_goniometer = flat_exps[ref_goniometer].goniometer except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_goniometer)) if ref_scan is not None: try: ref_scan = flat_exps[ref_scan].scan except IndexError: raise Sorry("{} is not a valid experiment ID".format(ref_scan)) if ref_crystal is not None: try: ref_crystal = flat_exps[ref_crystal].crystal except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_crystal)) if ref_detector is not None: assert not params.reference_from_experiment.average_detector try: ref_detector = flat_exps[ref_detector].detector except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_detector)) elif params.reference_from_experiment.average_detector: # Average all of the detectors together from scitbx.matrix import col def average_detectors(target, panelgroups, depth): # Recursive function to do the averaging if (params.reference_from_experiment.average_hierarchy_level is None or depth == params.reference_from_experiment. average_hierarchy_level): n = len(panelgroups) sum_fast = col((0.0, 0.0, 0.0)) sum_slow = col((0.0, 0.0, 0.0)) sum_ori = col((0.0, 0.0, 0.0)) # Average the d matrix vectors for pg in panelgroups: sum_fast += col(pg.get_local_fast_axis()) sum_slow += col(pg.get_local_slow_axis()) sum_ori += col(pg.get_local_origin()) sum_fast /= n sum_slow /= n sum_ori /= n # Re-orthagonalize the slow and the fast vectors by rotating around the cross product c = sum_fast.cross(sum_slow) a = sum_fast.angle(sum_slow, deg=True) / 2 sum_fast = sum_fast.rotate(c, a - 45, deg=True) sum_slow = sum_slow.rotate(c, -(a - 45), deg=True) target.set_local_frame(sum_fast, sum_slow, sum_ori) if target.is_group(): # Recurse for i, target_pg in enumerate(target): average_detectors(target_pg, [pg[i] for pg in panelgroups], depth + 1) ref_detector = flat_exps[0].detector average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0) combine = CombineWithReference( beam=ref_beam, goniometer=ref_goniometer, scan=ref_scan, crystal=ref_crystal, detector=ref_detector, params=params, ) # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 skipped_expts = 0 from dxtbx.model.experiment_list import ExperimentList experiments = ExperimentList() # loop through the input, building up the global lists nrefs_per_exp = [] for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data for i, exp in enumerate(exps): sel = refs["id"] == i sub_ref = refs.select(sel) n_sub_ref = len(sub_ref) if (params.output.min_reflections_per_experiment is not None and n_sub_ref < params.output.min_reflections_per_experiment): skipped_expts += 1 continue nrefs_per_exp.append(n_sub_ref) sub_ref["id"] = flex.int(len(sub_ref), global_id) if params.output.delete_shoeboxes and "shoebox" in sub_ref: del sub_ref["shoebox"] reflections.extend(sub_ref) try: experiments.append(combine(exp)) except ComparisonError as e: # When we failed tolerance checks, give a useful error message (path, index) = find_experiment_in(exp, params.input.experiments) raise Sorry( "Model didn't match reference within required tolerance for experiment {} in {}:" "\n{}\nAdjust tolerances or set compare_models=False to ignore differences." .format(index, path, str(e))) global_id += 1 if (params.output.min_reflections_per_experiment is not None and skipped_expts > 0): print("Removed {0} experiments with fewer than {1} reflections". format(skipped_expts, params.output.min_reflections_per_experiment)) # print number of reflections per experiment from libtbx.table_utils import simple_table header = ["Experiment", "Number of reflections"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print(st.format()) # save a random subset if requested if (params.output.n_subset is not None and len(experiments) > params.output.n_subset): subset_exp = ExperimentList() subset_refls = flex.reflection_table() if params.output.n_subset_method == "random": n_picked = 0 indices = list(range(len(experiments))) while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices) - 1)) subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), n_picked) subset_refls.extend(refls) n_picked += 1 print( "Selecting a random subset of {0} experiments out of {1} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "n_refl": if params.output.n_refl_panel_list is None: refls_subset = reflections else: sel = flex.bool(len(reflections), False) for p in params.output.n_refl_panel_list: sel |= reflections["panel"] == p refls_subset = reflections.select(sel) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( len(refls_subset.select( refls_subset["id"] == expt_id))) sort_order = flex.sort_permutation(refl_counts, reverse=True) for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) print( "Selecting a subset of {0} experiments with highest number of reflections out of {1} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "significance_filter": from dials.algorithms.integration.stills_significance_filter import ( SignificanceFilter, ) params.output.significance_filter.enable = True sig_filter = SignificanceFilter(params.output) refls_subset = sig_filter(experiments, reflections) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( len(refls_subset.select( refls_subset["id"] == expt_id))) sort_order = flex.sort_permutation(refl_counts, reverse=True) for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) experiments = subset_exp reflections = subset_refls def save_in_batches(experiments, reflections, exp_name, refl_name, batch_size=1000): from dxtbx.command_line.image_average import splitit for i, indices in enumerate( splitit(list(range(len(experiments))), (len(experiments) // batch_size) + 1)): batch_expts = ExperimentList() batch_refls = flex.reflection_table() for sub_id, sub_idx in enumerate(indices): batch_expts.append(experiments[sub_idx]) sub_refls = reflections.select( reflections["id"] == sub_idx) sub_refls["id"] = flex.int(len(sub_refls), sub_id) batch_refls.extend(sub_refls) exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i ref_filename = os.path.splitext( refl_name)[0] + "_%03d.refl" % i self._save_output(batch_expts, batch_refls, exp_filename, ref_filename) def combine_in_clusters(experiments_l, reflections_l, exp_name, refl_name, end_count): result = [] for cluster, experiment in enumerate(experiments_l): cluster_expts = ExperimentList() cluster_refls = flex.reflection_table() for i, expts in enumerate(experiment): refls = reflections_l[cluster][i] refls["id"] = flex.int(len(refls), i) cluster_expts.append(expts) cluster_refls.extend(refls) exp_filename = os.path.splitext(exp_name)[0] + ( "_cluster%d.expt" % (end_count - cluster)) ref_filename = os.path.splitext(refl_name)[0] + ( "_cluster%d.refl" % (end_count - cluster)) result.append( (cluster_expts, cluster_refls, exp_filename, ref_filename)) return result # cluster the resulting experiments if requested if params.clustering.use: clustered = Cluster( experiments, reflections, dendrogram=params.clustering.dendrogram, threshold=params.clustering.threshold, n_max=params.clustering.max_crystals, ) n_clusters = len(clustered.clustered_frames) def not_too_many(keeps): if params.clustering.max_clusters is not None: return len(keeps) < params.clustering.max_clusters return True keep_frames = [] sorted_keys = sorted(clustered.clustered_frames.keys()) while len(clustered.clustered_frames) > 0 and not_too_many( keep_frames): keep_frames.append( clustered.clustered_frames.pop(sorted_keys.pop(-1))) if params.clustering.exclude_single_crystal_clusters: keep_frames = [k for k in keep_frames if len(k) > 1] clustered_experiments = [[f.experiment for f in frame_cluster] for frame_cluster in keep_frames] clustered_reflections = [[f.reflections for f in frame_cluster] for frame_cluster in keep_frames] list_of_combined = combine_in_clusters( clustered_experiments, clustered_reflections, params.output.experiments_filename, params.output.reflections_filename, n_clusters, ) for saveable_tuple in list_of_combined: if params.output.max_batch_size is None: self._save_output(*saveable_tuple) else: save_in_batches(*saveable_tuple, batch_size=params.output.max_batch_size) else: if params.output.max_batch_size is None: self._save_output( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, ) else: save_in_batches( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, batch_size=params.output.max_batch_size, ) return
def __init__(self, miller_array, threshold = 3, protein=True, print_all=True, sigma_inflation=1.0, original_data=None): self.threshold = 3.0 assert miller_array.is_xray_intensity_array() self.miller_array = miller_array.deep_copy().f_sq_as_f( ).average_bijvoet_mates().f_as_f_sq().map_to_asu() space_group = self.miller_array.space_group() self.absences_table = analyze_absences( miller_array=self.miller_array, isigi_cut=threshold, sigma_inflation=sigma_inflation) if (original_data is not None) : self.absences_list = absences_list(obs=original_data, was_filtered=False) else : self.absences_list = absences_list(obs=self.miller_array, was_filtered=True) self.sg_iterator = sgi_iterator(chiral = True, intensity_symmetry = \ space_group.build_derived_reflection_intensity_group(False) ) self.sg_choices = [] self.mean_i = [] self.mean_isigi = [] self.n = [] self.violations = [] self.abs_types = [] self.tuple_score = [] score = [] for sg in self.sg_iterator.list(): xs = crystal.symmetry( unit_cell = self.miller_array.unit_cell(), space_group = sg.group()) tmp_miller = self.miller_array.customized_copy( crystal_symmetry = xs ) these_absent_millers = tmp_miller.select( tmp_miller.sys_absent_flags().data() ) if these_absent_millers.data().size() > 0: tmp_mean_i = flex.mean( these_absent_millers.data() ) zero_sel = these_absent_millers.sigmas()==0 these_absent_millers = these_absent_millers.select(~zero_sel) #print sg, list(these_absent_millers.indices()), list(these_absent_millers.data()) tmp_mean_isigi = flex.mean( these_absent_millers.data() / these_absent_millers.sigmas() ) tmp_n = these_absent_millers.data().size() tmp_violations = flex.bool( these_absent_millers.data() / these_absent_millers.sigmas() > self.threshold ).count( True ) else: tmp_mean_i = 0 tmp_mean_isigi = 0 tmp_n = 0 tmp_violations = 0 to_be_checked = [] for s in sg.group(): #check if this is an operator that causes absences tmp = conditions_for_operator( s ) if tmp.absence_type() != "None": if tmp.absence_type() in self.absences_table.op_name: ii = self.absences_table.op_name.index( tmp.absence_type() ) if tmp.absence_type() not in to_be_checked: if equivs.has_key( tmp.absence_type() ): if equivs[ tmp.absence_type() ] not in to_be_checked: to_be_checked.append( tmp.absence_type() ) tmp_score = self.absences_table.score[ ii ] else: to_be_checked.append( tmp.absence_type() ) tmp_score = self.absences_table.score[ ii ] self.abs_types.append( to_be_checked ) tuple_score = self.absences_table.propose( to_be_checked ) self.tuple_score.append( tuple_score ) self.sg_choices.append( sg ) self.mean_i.append( tmp_mean_i ) self.mean_isigi.append( tmp_mean_isigi ) self.n.append( tmp_n ) self.violations.append( tmp_violations ) tmp_rows = self.suggest_likely_candidates() self.sorted_table = table_utils.simple_table( column_headers=['space group', '# absent', '<Z>_absent', '<Z/sigZ>_absent', '+++', '---', 'score'], table_rows=tmp_rows)
def run_sys_abs_checks(experiments, reflections, d_min=None, significance_level=0.95): """Check for systematic absences in the data for the laue group. Select the good data, merge, test screw axes and score possible space groups. The crystals are updated with the most likely space group. """ if ( "inverse_scale_factor" in reflections[0] and "intensity.scale.value" in reflections[0] ): logger.info("Attempting to perform absence checks on scaled data") reflections = filter_reflection_table( reflections[0], intensity_choice=["scale"], d_min=d_min ) reflections["intensity"] = reflections["intensity.scale.value"] reflections["variance"] = reflections["intensity.scale.variance"] else: logger.info( "Attempting to perform absence checks on unscaled profile-integrated data" ) reflections = filter_reflection_table( reflections[0], intensity_choice=["profile"], d_min=d_min ) reflections["intensity"] = reflections["intensity.prf.value"] reflections["variance"] = reflections["intensity.prf.variance"] # now merge space_group = experiments[0].crystal.get_space_group() reflections["asu_miller_index"] = map_indices_to_asu( reflections["miller_index"], space_group ) reflections["inverse_scale_factor"] = flex.double(reflections.size(), 1.0) merged = ( _reflection_table_to_iobs( reflections, experiments[0].crystal.get_unit_cell(), space_group ) .merge_equivalents(use_internal_variance=False) .array() ) merged_reflections = flex.reflection_table() merged_reflections["intensity"] = merged.data() merged_reflections["variance"] = merged.sigmas() ** 2 merged_reflections["miller_index"] = merged.indices() # Get the laue class from the space group. laue_group = str(space_group.build_derived_patterson_group().info()) logger.info("Laue group: %s", laue_group) if laue_group not in laue_groups: logger.info("No absences to check for this laue group") return # Score the screw axes. screw_axes, screw_axis_scores = score_screw_axes( laue_groups[laue_group], merged_reflections, significance_level ) logger.info( simple_table( [ [ a.name, "%.3f" % score, str(a.n_refl_used[0]), str(a.n_refl_used[1]), "%.3f" % a.mean_I, "%.3f" % a.mean_I_abs, "%.3f" % a.mean_I_sigma, "%.3f" % a.mean_I_sigma_abs, ] for a, score in zip(screw_axes, screw_axis_scores) ], column_headers=[ "Screw axis", "Score", "No. present", "No. absent", "<I> present", "<I> absent", "<I/sig> present", "<I/sig> absent", ], ).format() ) # Score the space groups from the screw axis scores. space_groups, scores = score_space_groups( screw_axis_scores, laue_groups[laue_group] ) logger.info( simple_table( [[sg, "%.4f" % score] for sg, score in zip(space_groups, scores)], column_headers=["Space group", "score"], ).format() ) # Find the best space group and update the experiments. best_sg = space_groups[scores.index(max(scores))] logger.info("Recommended space group: %s", best_sg) if "enantiomorphic pairs" in laue_groups[laue_group]: if best_sg in laue_groups[laue_group]["enantiomorphic pairs"]: logger.info( "Space group with equivalent score (enantiomorphic pair): %s", laue_groups[laue_group]["enantiomorphic pairs"][best_sg], ) new_sg = sgtbx.space_group_info(symbol=best_sg).group() for experiment in experiments: experiment.crystal.set_space_group(new_sg)
def run(self): """Execute the script.""" # Parse the command line self.params, _ = self.parser.parse_args(show_diff_phil=True) if not self.params.input.experiments: self.parser.print_help() sys.exit() # Try to load the models experiments = flatten_experiments(self.params.input.experiments) nexp = len(experiments) if nexp == 0: self.parser.print_help() sys.exit("No Experiments found in the input") # Set up a plot if requested if self.params.plot_filename: plt.figure() header = [ "Image", "Beam direction (xyz)", "Zone axis [uvw]", "Angle from\nprevious (deg)", ] for iexp, exp in enumerate(experiments): print("For Experiment id = {}".format(iexp)) print(exp.beam) print(exp.crystal) print(exp.scan) if self.params.scale == "ewald_sphere_radius": scale = 1.0 / exp.beam.get_wavelength() elif self.params.scale == "max_cell": uc = exp.crystal.get_unit_cell() scale = max(uc.parameters()[0:3]) else: scale = 1.0 print("Beam direction scaled by {0} = {1:.3f} to " "calculate zone axis\n".format(self.params.scale, scale)) dat = extract_experiment_data(exp, scale) images = dat["images"] directions = dat["directions"] zone_axes = dat["zone_axes"] # calculate the orientation offset between each image offset = [ e1.angle(e2, deg=True) for e1, e2 in zip(zone_axes[:-1], zone_axes[1:]) ] str_off = ["---"] + ["{:.8f}".format(e) for e in offset] rows = [] for i, d, z, a in zip(images, directions, zone_axes, str_off): row = [ str(i), "{:.8f} {:.8f} {:.8f}".format(*d.elems), "{:.8f} {:.8f} {:.8f}".format(*z.elems), a, ] rows.append(row) # Print the table st = simple_table(rows, header) print(st.format()) # Add to the plot, if requested if self.params.plot_filename: plt.scatter(images[1:], offset, s=1) # Finish and save plot, if requested if self.params.plot_filename: plt.xlabel("Image number") plt.ylabel(r"Angle from previous image $\left(^\circ\right)$") plt.title(r"Angle between neighbouring images") print("Saving plot to {}".format(self.params.plot_filename)) plt.savefig(self.params.plot_filename) print()
def __call__(self, reflections): """Identify outliers in the input and set the centroid_outlier flag. Return True if any outliers were detected, otherwise False""" if self._verbosity > 0: logger.info("Detecting centroid outliers using the {0} algorithm".format( type(self).__name__)) # check the columns are present for col in self._cols: assert col in reflections sel = reflections.get_flags(reflections.flags.used_in_refinement) all_data = reflections.select(sel) all_data_indices = sel.iselection() nexp = flex.max(all_data['id']) + 1 jobs = [] if self._separate_experiments: # split the data set by experiment id for iexp in xrange(nexp): sel = all_data['id'] == iexp job = {'id':iexp, 'panel':'all', 'data':all_data.select(sel), 'indices':all_data_indices.select(sel)} jobs.append(job) else: # keep the whole dataset across all experiment ids job = {'id':'all', 'panel':'all', 'data':all_data, 'indices':all_data_indices} jobs.append(job) jobs2 = [] if self._separate_panels: # split further by panel id for job in jobs: data = job['data'] iexp = job['id'] indices = job['indices'] for ipanel in xrange(flex.max(data['panel']) + 1): sel = data['panel'] == ipanel job = {'id':iexp, 'panel':ipanel, 'data':data.select(sel), 'indices':indices.select(sel)} jobs2.append(job) else: # keep the splits as they are jobs2 = jobs jobs3 = [] if self.get_block_width() is not None: # split into equal-sized phi ranges for job in jobs2: data = job['data'] iexp = job['id'] ipanel = job['panel'] indices = job['indices'] phi = data['xyzobs.mm.value'].parts()[2] if len(phi) == 0: # detect no data in the job jobs3.append(job) continue phi_low = flex.min(phi) phi_range = flex.max(phi) - phi_low if phi_range == 0.0: # detect stills and do not split jobs3.append(job) continue bw = self.get_block_width(iexp) if bw is None: # detect no split for this experiment jobs3.append(job) continue nblocks = int(round(RAD2DEG * phi_range / bw)) nblocks = max(1, nblocks) real_width = phi_range / nblocks block_end = 0.0 for iblock in xrange(nblocks - 1): # all except the last block block_start = iblock * real_width block_end = (iblock + 1) * real_width sel = (phi >= (phi_low + block_start)) & \ (phi < (phi_low + block_end)) job = {'id':iexp, 'panel':ipanel, 'data':data.select(sel), 'indices':indices.select(sel), 'phi_start':RAD2DEG*(phi_low + block_start), 'phi_end':RAD2DEG*(phi_low + block_end)} jobs3.append(job) # now last block sel = phi >= (phi_low + block_end) job = {'id':iexp, 'panel':ipanel, 'data':data.select(sel), 'indices':indices.select(sel), 'phi_start':RAD2DEG*(phi_low + block_end), 'phi_end':RAD2DEG*(phi_low + phi_range)} jobs3.append(job) else: # keep the splits as they are jobs3 = jobs2 # Work out the format of the jobs table if self._verbosity > 0: header = ['Job'] if self._separate_experiments: header.append('Exp\nid') if self._separate_panels: header.append('Panel\nid') if self.get_block_width() is not None: header.append('Block range\n(deg)') header.extend(['Nref', 'Nout', '%out']) rows = [] # now loop over the lowest level of splits for i, job in enumerate(jobs3): data = job['data'] indices = job['indices'] iexp = job['id'] ipanel = job['panel'] nref = len(indices) if nref >= self._min_num_obs: # get the subset of data as a list of columns cols = [data[col] for col in self._cols] # determine the position of outliers on this sub-dataset outliers = self._detect_outliers(cols) # get positions of outliers from the original matches ioutliers = indices.select(outliers) elif nref > 0: # too few reflections in the job msg = "For job {0}, fewer than {1} reflections are present.".format( i + 1, self._min_num_obs) msg += " All reflections flagged as possible outliers." if self._verbosity > 0: logger.debug(msg) ioutliers = indices else: # no reflections in the job ioutliers = indices # set the centroid_outlier flag in the original reflection table nout = len(ioutliers) if nout > 0: reflections.set_flags(ioutliers, reflections.flags.centroid_outlier) self.nreject += nout # Add job data to the table if self._verbosity > 0: row = [str(i + 1)] if self._separate_experiments: row.append(str(iexp)) if self._separate_panels: row.append(str(ipanel)) if self.get_block_width() is not None: try: row.append('{phi_start:.2f} - {phi_end:.2f}'.format(**job)) except KeyError: row.append('{0:.2f} - {1:.2f}'.format(0.0,0.0)) if nref == 0: p100 = 0 else: p100 = nout / nref * 100.0 if p100 > 30.0: msg = ("{0:3.1f}% of reflections were flagged as outliers from job" " {1}").format(p100, i + 1) row.extend([str(nref), str(nout), '%3.1f' % p100]) rows.append(row) if self.nreject == 0: return False if self._verbosity > 0: logger.info("{0} reflections have been flagged as outliers".format(self.nreject)) logger.debug("Outlier rejections per job:") st = simple_table(rows, header) logger.debug(st.format()) return True
def print_panel_rmsd_table(self): """print useful output about refinement steps in the form of a simple table""" from libtbx.table_utils import simple_table if len(self._experiments.scans()) > 1: logger.warning( "Multiple scans present. Only the first scan will be used " "to determine the image width for reporting RMSDs") scan = self._experiments.scans()[0] try: images_per_rad = 1.0 / abs(scan.get_oscillation(deg=False)[1]) except AttributeError: images_per_rad = None for idetector, detector in enumerate(self._experiments.detectors()): if len(detector) == 1: continue logger.info("\nDetector {} RMSDs by panel:".format(idetector + 1)) header = ["Panel\nid", "Nref"] for (name, units) in zip(self._target.rmsd_names, self._target.rmsd_units): if name == "RMSD_X" or name == "RMSD_Y" and units == "mm": header.append(name + "\n(px)") elif (name == "RMSD_Phi" and units == "rad" ): # convert radians to images for reporting of scans header.append("RMSD_Z" + "\n(images)") elif (name == "RMSD_DeltaPsi" and units == "rad" ): # convert radians to degrees for reporting of stills header.append(name + "\n(deg)") else: # skip RMSDs that cannot be expressed in image/scan space pass rows = [] for ipanel, panel in enumerate(detector): px_size = panel.get_pixel_size() px_per_mm = [1.0 / e for e in px_size] num = self._target.get_num_matches_for_panel(ipanel) if num <= 0: continue raw_rmsds = self._target.rmsds_for_panel(ipanel) if raw_rmsds is None: continue # skip panels where rmsd cannot be calculated rmsds = [] for (name, units, rmsd) in zip(self._target.rmsd_names, self._target.rmsd_units, raw_rmsds): if name == "RMSD_X" and units == "mm": rmsds.append(rmsd * px_per_mm[0]) elif name == "RMSD_Y" and units == "mm": rmsds.append(rmsd * px_per_mm[1]) elif name == "RMSD_Phi" and units == "rad": rmsds.append(rmsd * images_per_rad) elif name == "RMSD_DeltaPsi" and units == "rad": rmsds.append(rmsd * RAD2DEG) rows.append([str(ipanel), str(num)] + ["%.5g" % r for r in rmsds]) if len(rows) > 0: st = simple_table(rows, header) logger.info(st.format()) return
def print_stats_on_matches(self): """Print some basic statistics on the matches""" l = self.get_matches() nref = len(l) from libtbx.table_utils import simple_table from scitbx.math import five_number_summary x_resid = l['x_resid'] y_resid = l['y_resid'] delpsi = l['delpsical.rad'] w_x, w_y, _ = l['xyzobs.mm.weights'].parts() w_delpsi = l['delpsical.weights'] msg = "\nSummary statistics for {0} observations".format(nref) +\ " matched to predictions:" header = ["", "Min", "Q1", "Med", "Q3", "Max"] rows = [] try: row_data = five_number_summary(x_resid) rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(y_resid) rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(delpsi) rows.append(["DeltaPsi (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data]) row_data = five_number_summary(w_x) rows.append(["X weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_y) rows.append(["Y weights"] + ["%.4g" % e for e in row_data]) row_data = five_number_summary(w_delpsi) rows.append(["DeltaPsi weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data]) except IndexError: # zero length reflection list logger.warning("Unable to calculate summary statistics for zero observations") return logger.info(msg) st = simple_table(rows, header) logger.info(st.format()) logger.info("") # sorting is expensive and the following table is only of interest in # special cases, so return now if verbosity is not high if self._verbosity < 3: return if nref < 20: logger.debug("Fewer than 20 reflections matched!") return sl = self._sort_obs_by_residual(l) logger.debug("Reflections with the worst 20 positional residuals:") header = ['Miller index', 'x_resid', 'y_resid', 'pnl', 'x_obs', 'y_obs', 'x_obs\nweight', 'y_obs\nweight'] rows = [] for i in xrange(20): e = sl[i] x_obs, y_obs, _ = e['xyzobs.mm.value'] rows.append(['% 3d, % 3d, % 3d'%e['miller_index'], '%5.3f'%e['x_resid'], '%5.3f'%e['y_resid'], '%d'%e['panel'], '%5.3f'%x_obs, '%5.3f'%y_obs, '%5.3f'%e['xyzobs.mm.weights'][0], '%5.3f'%e['xyzobs.mm.weights'][1]]) logger.debug(simple_table(rows, header).format()) logger.debug("") return