Example #1
0
    def print_stats_on_matches(self):

        l = self.get_matches()
        nref = len(l)
        if nref == 0:
            logger.warning(
                "Unable to calculate summary statistics for zero observations")
            return

        twotheta_resid = l["2theta_resid"]
        w_2theta = l["2theta.weights"]

        msg = ("\nSummary statistics for {} observations".format(nref) +
               " matched to predictions:")
        header = ["", "Min", "Q1", "Med", "Q3", "Max"]
        rows = []
        row_data = five_number_summary(twotheta_resid)
        rows.append(["2theta_c - 2theta_o (deg)"] +
                    ["%.4g" % (e * RAD2DEG) for e in row_data])
        row_data = five_number_summary(w_2theta)
        rows.append(["2theta weights"] +
                    ["%.4g" % (e * DEG2RAD**2) for e in row_data])
        st = simple_table(rows, header)
        logger.info(msg)
        logger.info(st.format())
        logger.info("")
Example #2
0
    def run(self):
        results = [self.process(image) for image in self.images]

        header = (["Image", "Num spots"] +
                  [str(i + 1) for i, _ in enumerate(self.cmds)] + ["Best"])
        rows = []
        for im, res in zip(self.images, results):
            offset = []
            for v in res["offset_deg"]:
                if v is None:
                    offset.append("fail")
                else:
                    offset.append("{:.3f}".format(v))

            row = [
                str(e1) + ":" + e2 for e1, e2 in zip(res["nindexed"], offset)
            ]
            row = [im, str(res["nspots"])] + row

            nindexed = res["nindexed"]
            top = max(nindexed)
            best = " "
            for i, _ in enumerate(self.cmds):
                if nindexed[i] == top:
                    best += str(i + 1) + " "
            if res["nspots"] == top:
                best += "*"
            rows.append(row + [best])
        st = simple_table(rows, header)
        print(st.format())
Example #3
0
    def print_stats_on_matches(self):

        l = self.get_matches()
        nref = len(l)

        from libtbx.table_utils import simple_table
        from scitbx.math import five_number_summary
        twotheta_resid = l['2theta_resid']
        w_2theta = l['2theta.weights']

        msg = "\nSummary statistics for {0} observations".format(nref) +\
              " matched to predictions:"
        header = ["", "Min", "Q1", "Med", "Q3", "Max"]
        rows = []
        try:
            row_data = five_number_summary(twotheta_resid)
            rows.append(["2theta_c - 2theta_o (deg)"] +
                        ["%.4g" % (e * RAD2DEG) for e in row_data])
            row_data = five_number_summary(w_2theta)
            rows.append(["2theta weights"] +
                        ["%.4g" % (e * DEG2RAD**2) for e in row_data])
            st = simple_table(rows, header)
        except IndexError:
            # zero length reflection list
            logger.warning(
                "Unable to calculate summary statistics for zero observations")
            return
        logger.info(msg)
        logger.info(st.format())
        logger.info("")
Example #4
0
 def minimisation_summary(self):
     """Generate a summary of the model minimisation for output."""
     header = [
         "Intensity range (<Ih>)",
         "n_refl",
         "Uncorrected variance",
         "Corrected variance",
     ]
     rows = []
     bin_bounds = ["%.2f" % i for i in self.binning_info["bin_boundaries"]]
     for i, (initial_var, bin_var, n_refl) in enumerate(
             zip(
                 self.binning_info["initial_variances"],
                 self.binning_info["bin_variances"],
                 self.binning_info["refl_per_bin"],
             )):
         rows.append([
             bin_bounds[i] + " - " + bin_bounds[i + 1],
             str(int(n_refl)),
             str(round(initial_var, 3)),
             str(round(bin_var, 3)),
         ])
     st = simple_table(rows, header)
     return "\n".join((
         "Results of error model refinement. Uncorrected and corrected variances",
         "of normalised intensity deviations for given intensity ranges. Variances",
         "are expected to be ~1.0 for reliable errors (sigmas).",
         st.format(),
         "",
     ))
Example #5
0
  def print_stats_on_matches(self):

    l = self.get_matches()
    nref = len(l)

    from libtbx.table_utils import simple_table
    from scitbx.math import five_number_summary
    twotheta_resid = l['2theta_resid']
    w_2theta = l['2theta.weights']

    msg = "\nSummary statistics for {0} observations".format(nref) +\
          " matched to predictions:"
    header = ["", "Min", "Q1", "Med", "Q3", "Max"]
    rows = []
    try:
      row_data = five_number_summary(twotheta_resid)
      rows.append(["2theta_c - 2theta_o (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data])
      row_data = five_number_summary(w_2theta)
      rows.append(["2theta weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data])
      st = simple_table(rows, header)
    except IndexError:
      # zero length reflection list
      logger.warning("Unable to calculate summary statistics for zero observations")
      return
    logger.info(msg)
    logger.info(st.format())
    logger.info("")
Example #6
0
    def interpret_results(self):
        """Inspect the data in results_dict, make a nice table with the mean and
        average over many attempts and indicate the 'best' option"""
        rows = []
        headers = ["option", ""] + self.results_metadata["names"]
        monitored_values = []

        # Construct the rows, using the metadata from the results dict
        for v in self.results_dict.values():
            config_str = " ".join(v["configuration"])
            vals, stds = [], []
            for i, name in enumerate(self.results_metadata["names"]):
                val, std = self._avg_sd_from_list(v[name])
                vals.append(val)
                stds.append(std)
                if i in self.results_metadata["indices_to_monitor"]:
                    monitored_values.append(val)
            rows.append([config_str, "mean"] + [str(i) for i in vals])
            rows.append(["", "std dev"] + [str(i) for i in stds])

        # Now go through monitored values, finding the best and adding a '*'
        n_monitored = len(self.results_metadata["indices_to_monitor"])
        for i in range(n_monitored):
            vals = monitored_values[i::n_monitored]
            if self.results_metadata["best_criterion"][i] == "max":
                best_idx = vals.index(max(vals)) * 2  # *2 to skip std rows
            elif self.results_metadata["best_criterion"][i] == "min":
                best_idx = vals.index(min(vals)) * 2  # *2 to skip std rows
            rows[best_idx][self.results_metadata["indices_to_monitor"][i] +
                           2] += "*"
            # line above, 2 is to offset first two columns in table

        return simple_table(rows, headers)
Example #7
0
def select_highly_connected_reflections(Ih_table_block,
                                        experiment,
                                        min_per_area,
                                        n_resolution_bins,
                                        print_summary=False):
    """Select highly connected reflections within a dataset, across resolutions."""
    min_per_bin = min_per_area * 12 * 1.5
    max_per_bin = min_per_area * 12 * 3.0
    assert "s1c" in Ih_table_block.Ih_table

    theta_phi_1 = calc_theta_phi(Ih_table_block.Ih_table["s1c"])
    theta = theta_phi_1.parts()[0]
    phi = theta_phi_1.parts()[1]
    Ih_table_block.Ih_table["phi"] = (phi * 180 / pi) + 180.0
    Ih_table_block.Ih_table["theta"] = theta * 180 / pi

    Ih_table_block.Ih_table = assign_segment_index(Ih_table_block.Ih_table)
    Ih_table_block.setup_binner(
        experiment.crystal.get_unit_cell(),
        experiment.crystal.get_space_group(),
        n_resolution_bins,
    )
    binner = Ih_table_block.binner

    overall_indices = flex.size_t()

    header = ["d-range", "n_refl"] + [str(i) for i in range(0, 12)]
    rows = []

    for ibin in binner.range_all():
        sel = binner.selection(ibin)
        sel_Ih_table_block = Ih_table_block.select(sel)
        indices_wrt_original = Ih_table_block.Ih_table["loc_indices"].select(
            sel)
        indices, total_in_classes = select_highly_connected_reflections_in_bin(
            sel_Ih_table_block, min_per_area, min_per_bin, max_per_bin)
        if indices:
            overall_indices.extend(indices_wrt_original.select(indices))
            d0, d1 = binner.bin_d_range(ibin)
            rows.append([
                str(round(d0, 3)) + " - " + str(round(d1, 3)),
                str(int(flex.sum(total_in_classes))),
            ] + [str(int(i)) for i in total_in_classes])
    st = simple_table(rows, header)
    msg = """\nSummary of reflection selection algorithm for this dataset:
%s resolution bins, target: at least %s reflections per area,
between %s and %s reflections per resolution bin""" % (
        n_resolution_bins,
        min_per_area,
        18 * min_per_area,
        36 * min_per_area,
    )
    if print_summary:
        logger.info(msg)
        logger.info(st.format())
    else:
        logger.debug(msg)
        logger.debug(st.format())
    return overall_indices
Example #8
0
    def print_scaling_summary(self, scaling_script):
        """Log summary information after scaling."""
        if ScalingModelObserver().data:
            logger.info(ScalingModelObserver().return_model_error_summary())
        valid_ranges = get_valid_image_ranges(scaling_script.experiments)
        image_ranges = get_image_ranges(scaling_script.experiments)
        msg = []
        for (img, valid, exp) in zip(
            image_ranges, valid_ranges, scaling_script.experiments
        ):
            if valid:
                if len(valid) > 1 or valid[0][0] != img[0] or valid[-1][1] != img[1]:
                    msg.append(
                        "Excluded images for experiment identifier: %s, image range: %s, limited range: %s"
                        % (exp.identifier, list(img), list(valid))
                    )
        if msg:
            msg = ["Summary of image ranges removed:"] + msg
            logger.info("\n".join(msg))

        # report on partiality of dataset
        partials = flex.double()
        for r in scaling_script.reflections:
            if "partiality" in r:
                partials.extend(r["partiality"])
        not_full_sel = partials < 0.99
        not_zero_sel = partials > 0.01
        gt_half = partials > 0.5
        lt_half = partials < 0.5
        partial_gt_half_sel = not_full_sel & gt_half
        partial_lt_half_sel = not_zero_sel & lt_half
        logger.info("Summary of dataset partialities")
        header = ["Partiality (p)", "n_refl"]
        rows = [
            ["all reflections", str(partials.size())],
            ["p > 0.99", str(not_full_sel.count(False))],
            ["0.5 < p < 0.99", str(partial_gt_half_sel.count(True))],
            ["0.01 < p < 0.5", str(partial_lt_half_sel.count(True))],
            ["p < 0.01", str(not_zero_sel.count(False))],
        ]
        st = simple_table(rows, header)
        logger.info(st.format())
        logger.info(
            """
Reflections below a partiality_cutoff of %s are not considered for any
part of the scaling analysis or for the reporting of merging statistics.
Additionally, if applicable, only reflections with a min_partiality > %s
were considered for use when refining the scaling model.
""",
            scaling_script.params.cut_data.partiality_cutoff,
            scaling_script.params.reflection_selection.min_partiality,
        )
        if MergingStatisticsObserver().data:
            logger.info(
                make_merging_statistics_summary(
                    MergingStatisticsObserver().data["statistics"]
                )
            )
Example #9
0
    def _show_impl(self, out):
        ss, rr, ii, zz = self.data_as_flex_arrays()
        flagged = zz > self.cutoff
        sel_ss = ss.select(flagged)
        sel_z = zz.select(flagged)
        sel_r = rr.select(flagged)
        sel_i = ii.select(flagged)
        out.show_sub_header("Relative Wilson plot")
        out.show_text("""\
The relative Wilson plot compares the mean intensity of the observed data with
the mean intensity computed from the model, as a function of resolution.  This
curve is expected to fall off at low resolution if no contribution for bulk
solvent is provided for the calculated intensities, because the presence of
bulk solvent reduces the observed intensities at low resolution by reducing
the contrast.  At high resolution, the curve should be a straight line with a
slope that reflects the difference in overall B-factor between the model and
the data.  Compared to the normal Wilson plot, the relative Wilson plot is
more linear because the influence of favored distances between atoms, caused
by bonding and secondary structure, is cancelled out.
""")
        out.show_plot(self.table)
        if (self.all_bad_z_scores):
            out.warn("""\
All resolution shells have Z-scores above %4.2f sigma.  This is indicative of
severe problems with the input data, including processing errors or ice rings.
We recommend checking the logs for data processing and inspecting the raw
images.\n""" % self.cutoff)
        else:
            out.show_text("""\
All relative wilson plot outliers above %4.2f sigma are reported.
""" % self.cutoff)
        out.newline()
        rows = []
        if len(sel_ss) > 0:
            for s, z, r, i in zip(sel_ss, sel_z, sel_r, sel_i):
                sss = math.sqrt(1.0 / s)
                rows.append(
                    ["%8.2f" % sss,
                     "%9.3e" % r,
                     "%9.3e" % i,
                     "%5.2f" % z])
            table = table_utils.simple_table(column_headers=[
                "d-spacing", "Obs. Log[ratio]", "Expected Log[ratio]",
                "Z-score"
            ],
                                             table_rows=rows)
            out.show_table(table)
        else:
            out.show(
                "The Relative wilson plot doesn't indicate any serious errors."
            )
def interpret_results(results_dict):
    """Pass in a dict of results. Each item is a different attempt.
    Expect a configuration and final_rmsds columns. Score the data and make a
    nice table."""
    rows = []
    headers = ["option", "", "Rwork", "Rfree", "CCwork", "CCfree"]
    free_rmsds = []
    free_cc12s = []

    def avg_sd_from_list(lst):
        """simple function to get average and standard deviation"""
        arr = flex.double(lst)
        avg = round(flex.mean(arr), 5)
        std = round(arr.standard_deviation_of_the_sample(), 5)
        return avg, std

    for v in results_dict.itervalues():
        config_str = " ".join(v["configuration"])
        avg_work, std_work = avg_sd_from_list(v["Rwork"])
        avg_free, std_free = avg_sd_from_list(v["Rfree"])
        avg_ccwork, std_ccwork = avg_sd_from_list(v["CCwork"])
        avg_ccfree, std_ccfree = avg_sd_from_list(v["CCfree"])
        rows.append([
            config_str,
            "mean",
            str(avg_work),
            str(avg_free),
            str(avg_ccwork),
            str(avg_ccfree),
        ])
        rows.append([
            "",
            "std dev",
            str(std_work),
            str(std_free),
            str(std_ccwork),
            str(std_ccfree),
        ])
        free_rmsds.append(avg_free)
        free_cc12s.append(avg_ccfree)
    # find lowest free rmsd
    low_rmsd_idx = free_rmsds.index(min(free_rmsds)) * 2  # *2 to skip std rows
    high_cc12_idx = free_cc12s.index(max(free_cc12s)) * 2
    rows[low_rmsd_idx][3] += "*"
    rows[high_cc12_idx][5] += "*"
    st = simple_table(rows, headers)
    logger.info("Summary of the cross validation analysis: \n")
    logger.info(st.format())
Example #11
0
    def print_stats_on_matches(self):
        """Print some basic statistics on the matches"""

        l = self.get_matches()
        nref = len(l)
        if nref == 0:
            logger.warning(
                "Unable to calculate summary statistics for zero observations"
            )
            return

        from libtbx.table_utils import simple_table
        from scitbx.math import five_number_summary

        try:
            x_resid = l["x_resid"]
            y_resid = l["y_resid"]
            delpsi = l["delpsical.rad"]
            w_x, w_y, _ = l["xyzobs.mm.weights"].parts()
            w_delpsi = l["delpsical.weights"]
        except KeyError:
            return

        header = ["", "Min", "Q1", "Med", "Q3", "Max"]
        rows = []
        row_data = five_number_summary(x_resid)
        rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data])
        row_data = five_number_summary(y_resid)
        rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data])
        row_data = five_number_summary(delpsi)
        rows.append(["DeltaPsi (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data])
        row_data = five_number_summary(w_x)
        rows.append(["X weights"] + ["%.4g" % e for e in row_data])
        row_data = five_number_summary(w_y)
        rows.append(["Y weights"] + ["%.4g" % e for e in row_data])
        row_data = five_number_summary(w_delpsi)
        rows.append(
            ["DeltaPsi weights"] + ["%.4g" % (e * DEG2RAD ** 2) for e in row_data]
        )

        msg = (
            "\nSummary statistics for {} observations".format(nref)
            + " matched to predictions:"
        )
        logger.info(msg)
        st = simple_table(rows, header)
        logger.info(st.format())
        logger.info("")
Example #12
0
def run_analysis(flags, reflections):
    """Print a table of flags present in the reflections file"""

    header = ["flag", "nref"]
    rows = []
    for name, val in flags:
        n = (reflections.get_flags(val)).count(True)
        if n > 0:
            rows.append([name, "%d" % n])
    if rows:
        st = simple_table(rows, header)
        print(st.format())
    else:
        print("No flags set")

    return
Example #13
0
    def analysis(self, reflections):
        '''Print a table of flags present in the reflections file'''

        from libtbx.table_utils import simple_table
        header = ['flag', 'nref']
        rows = []
        for name, val in zip(self.flag_names, self.flag_values):
            n = (reflections.get_flags(val)).count(True)
            if n > 0: rows.append([name, "%d" % n])
        if len(rows) > 0:
            st = simple_table(rows, header)
            print st.format()
        else:
            print "No flags set"

        return
Example #14
0
  def cell_param_table(crystal):
    '''Construct a table of cell parameters and their ESDs'''

    from libtbx.table_utils import simple_table
    cell = crystal.get_unit_cell().parameters()
    esd = crystal.get_cell_parameter_sd()
    vol = crystal.get_unit_cell().volume()
    vol_esd = crystal.get_cell_volume_sd()
    header = ["Parameter", "Value", "Estimated sd"]
    rows = []
    names = ["a", "b", "c", "alpha", "beta", "gamma"]
    for n, p, e in zip(names, cell, esd):
      rows.append([n, "%9.5f" % p, "%9.5f" % e])
    rows.append(["\nvolume", "\n%9.5f" % vol, "\n%9.5f" % vol_esd])
    st = simple_table(rows, header)
    return st.format()
Example #15
0
  def analysis(self, reflections):
    '''Print a table of flags present in the reflections file'''

    from libtbx.table_utils import simple_table
    header = ['flag','nref']
    rows = []
    for name, val in zip(self.flag_names, self.flag_values):
      n = (reflections.get_flags(val)).count(True)
      if n > 0: rows.append([name, "%d" % n])
    if len(rows) > 0:
      st = simple_table(rows, header)
      print st.format()
    else:
      print "No flags set"

    return
Example #16
0
    def cell_param_table(crystal):
        '''Construct a table of cell parameters and their ESDs'''

        from libtbx.table_utils import simple_table
        cell = crystal.get_unit_cell().parameters()
        esd = crystal.get_cell_parameter_sd()
        vol = crystal.get_unit_cell().volume()
        vol_esd = crystal.get_cell_volume_sd()
        header = ["Parameter", "Value", "Estimated sd"]
        rows = []
        names = ["a", "b", "c", "alpha", "beta", "gamma"]
        for n, p, e in zip(names, cell, esd):
            rows.append([n, "%9.5f" % p, "%9.5f" % e])
        rows.append(["\nvolume", "\n%9.5f" % vol, "\n%9.5f" % vol_esd])
        st = simple_table(rows, header)
        return st.format()
    def run(self):
        """Execute the script."""

        from dials.util.options import flatten_experiments
        from dials.util import Sorry
        from dials.array_family import flex

        # Parse the command line
        params, options = self.parser.parse_args(show_diff_phil=True)

        # Try to load the experiments
        if not params.input.experiments:
            print("No Experiments found in the input")
            self.parser.print_help()
            return

        experiments = flatten_experiments(params.input.experiments)
        print("{0} experiments loaded".format(len(experiments)))

        us0_vecs = self.extract_us0_vecs(experiments)
        e_vecs = self.extract_rotation_axes(experiments)

        angles = [us0.angle(e, deg=True) for us0, e in zip(us0_vecs, e_vecs)]

        fmt = "{:." + str(params.print_precision) + "f}"
        header = ["Exp\nid", "Beam direction", "Rotation axis", "Angle (deg)"]
        rows = []
        for iexp, (us0, e, ang) in enumerate(zip(us0_vecs, e_vecs, angles)):
            beam_str = " ".join([fmt] * 3).format(*us0.elems)
            e_str = " ".join([fmt] * 3).format(*e.elems)
            rows.append([str(iexp), beam_str, e_str, fmt.format(ang)])
        if len(rows) > 0:
            st = simple_table(rows, header)
            print(st.format())

        # mean and sd
        if len(rows) > 1:
            angles = flex.double(angles)
            mv = flex.mean_and_variance(angles)

            print("Mean and standard deviation of the angle")
            print(
                fmt.format(mv.mean()) + " +/- " +
                fmt.format(mv.unweighted_sample_standard_deviation()))
            print()

        return
  def run(self):
    '''Execute the script.'''

    from dials.util.options import flatten_experiments
    from libtbx.utils import Sorry
    from dials.array_family import flex

    # Parse the command line
    params, options = self.parser.parse_args(show_diff_phil=True)

    # Try to load the experiments
    if not params.input.experiments:
      print "No Experiments found in the input"
      self.parser.print_help()
      return

    experiments = flatten_experiments(params.input.experiments)
    print "{0} experiments loaded".format(len(experiments))

    us0_vecs = self.extract_us0_vecs(experiments)
    e_vecs = self.extract_rotation_axes(experiments)

    angles = [us0.angle(e, deg=True) for us0, e in zip(us0_vecs, e_vecs)]

    fmt = "{:." + str(params.print_precision) + "f}"
    header = ['Exp\nid','Beam direction', 'Rotation axis', 'Angle (deg)']
    rows = []
    for iexp, (us0, e, ang) in enumerate(zip(us0_vecs, e_vecs, angles)):
      beam_str = " ".join([fmt] * 3).format(*us0.elems)
      e_str = " ".join([fmt] * 3).format(*e.elems)
      rows.append([str(iexp), beam_str, e_str, fmt.format(ang)])
    if len(rows) > 0:
      st = simple_table(rows, header)
      print st.format()

    # mean and sd
    if len(rows) > 1:
      angles = flex.double(angles)
      mv = flex.mean_and_variance(angles)

      print "Mean and standard deviation of the angle"
      print (fmt.format(mv.mean()) + " +/- " + fmt.format(
        mv.unweighted_sample_standard_deviation()))
      print

    return
Example #19
0
  def _show_impl (self, out) :
    ss,rr,ii,zz = self.data_as_flex_arrays()
    flagged = zz > self.cutoff
    sel_ss = ss.select(flagged)
    sel_z = zz.select(flagged)
    sel_r = rr.select(flagged)
    sel_i = ii.select(flagged)
    out.show_sub_header("Relative Wilson plot")
    out.show_text("""\
The relative Wilson plot compares the mean intensity of the observed data with
the mean intensity computed from the model, as a function of resolution.  This
curve is expected to fall off at low resolution if no contribution for bulk
solvent is provided for the calculated intensities, because the presence of
bulk solvent reduces the observed intensities at low resolution by reducing
the contrast.  At high resolution, the curve should be a straight line with a
slope that reflects the difference in overall B-factor between the model and
the data.  Compared to the normal Wilson plot, the relative Wilson plot is
more linear because the influence of favored distances between atoms, caused
by bonding and secondary structure, is cancelled out.
""")
    out.show_plot(self.table)
    if (self.all_bad_z_scores) :
      out.warn("""\
All resolution shells have Z-scores above %4.2f sigma.  This is indicative of
severe problems with the input data, including processing errors or ice rings.
We recommend checking the logs for data processing and inspecting the raw
images.\n""" % self.cutoff)
    else :
      out.show_text("""\
All relative wilson plot outliers above %4.2f sigma are reported.
""" % self.cutoff)
    out.newline()
    rows = []
    if len(sel_ss) > 0:
      for s,z,r,i in zip(sel_ss,sel_z,sel_r,sel_i):
        sss = math.sqrt(1.0/s)
        rows.append([ "%8.2f" % sss, "%9.3e" % r, "%9.3e" % i, "%5.2f" % z ])
      table = table_utils.simple_table(
        column_headers=["d-spacing", "Obs. Log[ratio]", "Expected Log[ratio]",
          "Z-score"],
        table_rows=rows)
      out.show_table(table)
    else:
      out.show("The Relative wilson plot doesn't indicate any serious errors.")
Example #20
0
    def __init__(self, scaler, use_Imid=None):
        if "intensity.prf.value" not in scaler.reflection_table:
            self.max_key = 1
            logger.info(
                "No profile intensities found, skipping profile/summation intensity combination."
            )
            return
        self.scaler = scaler
        self.experiment = scaler.experiment
        if use_Imid is not None:
            self.max_key = use_Imid
        else:
            self.Imids = scaler.params.reflection_selection.combine.Imid
            self.dataset = _make_reflection_table_from_scaler(self.scaler)
            if "partiality" in self.dataset:
                raw_intensities = (
                    self.dataset["intensity.sum.value"].as_double() /
                    self.dataset["partiality"])
            else:
                raw_intensities = self.dataset[
                    "intensity.sum.value"].as_double()
            logger.debug("length of raw intensity array: %s",
                         raw_intensities.size())
            self._determine_Imids(raw_intensities)
            header = ["Combination", "CC1/2", "Rmeas"]
            rows, results = self._test_Imid_combinations()
            st = simple_table(rows, header)
            logger.info(st.format())

            self.max_key = min(results, key=results.get)
            if self.max_key == 0:
                logger.info(
                    "Profile intensities determined to be best for scaling. \n"
                )
            elif self.max_key == 1:
                logger.info(
                    "Summation intensities determined to be best for scaling. \n"
                )
            else:
                logger.info(
                    "Combined intensities with Imid = %s determined to be best for scaling. \n",
                    self.max_key,
                )
Example #21
0
    def print_stats_on_matches(self):
        """Print some basic statistics on the matches"""

        l = self.get_matches()
        nref = len(l)
        if nref == 0:
            logger.warning(
                "Unable to calculate summary statistics for zero observations"
            )
            return

        try:
            x_resid = l["x_resid"]
            y_resid = l["y_resid"]
            phi_resid = l["phi_resid"]
            w_x, w_y, w_phi = l["xyzobs.mm.weights"].parts()
        except KeyError:
            return

        msg = (
            "\nSummary statistics for {} observations".format(nref)
            + " matched to predictions:"
        )
        header = ["", "Min", "Q1", "Med", "Q3", "Max"]
        rows = []
        row_data = five_number_summary(x_resid)
        rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data])
        row_data = five_number_summary(y_resid)
        rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data])
        row_data = five_number_summary(phi_resid)
        rows.append(["Phic - Phio (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data])
        row_data = five_number_summary(w_x)
        rows.append(["X weights"] + ["%.4g" % e for e in row_data])
        row_data = five_number_summary(w_y)
        rows.append(["Y weights"] + ["%.4g" % e for e in row_data])
        row_data = five_number_summary(w_phi)
        rows.append(["Phi weights"] + ["%.4g" % (e * DEG2RAD ** 2) for e in row_data])
        st = simple_table(rows, header)

        logger.info(msg)
        logger.info(st.format())
        logger.info("")
Example #22
0
 def minimisation_summary(self):
     """Output a summary of model minimisation to the logger."""
     header = ["Intensity range (<Ih>)", "n_refl", "variance(norm_dev)"]
     rows = []
     bin_bounds = ["%.2f" % i for i in self.binning_info["bin_boundaries"]]
     for i, (bin_var, n_refl) in enumerate(
             zip(self.binning_info["bin_variances"],
                 self.binning_info["refl_per_bin"])):
         rows.append([
             bin_bounds[i] + " - " + bin_bounds[i + 1],
             str(n_refl),
             str(round(bin_var, 3)),
         ])
     st = simple_table(rows, header)
     logger.info("\n".join((
         "Intensity bins used during error model refinement:",
         st.format(),
         "variance(norm_dev) expected to be ~ 1 for each bin.",
         "",
     )))
Example #23
0
def print_step_table(refinery):
    """print useful output about refinement steps in the form of a simple table"""

    logger.info("\nRefinement steps:")

    header = ["Step", "Nref"]
    for (name, units) in zip(refinery._target.rmsd_names,
                             refinery._target.rmsd_units):
        header.append(name + "\n(" + units + ")")

    rows = []
    for i in range(refinery.history.get_nrows()):
        rmsds = [r for r in refinery.history["rmsd"][i]]
        rows.append(
            [str(i), str(refinery.history["num_reflections"][i])] +
            ["%.5g" % r for r in rmsds])

    st = simple_table(rows, header)
    logger.info(st.format())
    logger.info(refinery.history.reason_for_termination)
Example #24
0
    def print_out_of_sample_rmsd_table(self):
        """print out-of-sample RSMDs per step, if these were tracked"""

        from libtbx.table_utils import simple_table

        # check if it makes sense to proceed
        if "out_of_sample_rmsd" not in self._refinery.history:
            return
        nref = len(self.get_free_reflections())
        if nref < 10:
            return  # don't do anything if very few refs

        logger.info("\nRMSDs for out-of-sample (free) reflections:")

        rmsd_multipliers = []
        header = ["Step", "Nref"]
        for (name, units) in zip(self._target.rmsd_names,
                                 self._target.rmsd_units):
            if units == "mm":
                header.append(name + "\n(mm)")
                rmsd_multipliers.append(1.0)
            elif units == "rad":  # convert radians to degrees for reporting
                header.append(name + "\n(deg)")
                rmsd_multipliers.append(RAD2DEG)
            else:  # leave unknown units alone
                header.append(name + "\n(" + units + ")")

        rows = []
        for i in range(self._refinery.history.get_nrows()):
            rmsds = [
                r * m for r, m in zip(
                    self._refinery.history["out_of_sample_rmsd"][i],
                    rmsd_multipliers)
            ]
            rows.append([str(i), str(nref)] + ["%.5g" % e for e in rmsds])

        st = simple_table(rows, header)
        logger.info(st.format())

        return
Example #25
0
    def run(self):
        '''Execute the script.'''

        # Parse the command line
        params, options = self.parser.parse_args(show_diff_phil=True)

        try:
            assert len(params.input.reflections) == len(params.input.datablock)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input datablocks")

        datablocks = flatten_datablocks(params.input.datablock)
        reflections = flatten_reflections(params.input.reflections)

        if len(reflections):
            r = self.combine_reflections(reflections)
            # print number of reflections per imageset
            from libtbx.table_utils import simple_table
            max_id = max(r['id'])
            header = ["Imageset", "Nref"]
            nrefs_per_imset = [(r['id'] == i).count(True)
                               for i in range(max_id + 1)]
            rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_imset)]
            st = simple_table(rows, header)
            print(st.format())
            rf = params.output.reflections_filename
            print('Saving combined reflections to {0}'.format(rf))
            r.as_pickle(rf)

        if len(datablocks):
            db = self.combine_datablocks(datablocks)
            dbf = params.output.datablocks_filename
            print('Saving combined datablocks to {0}'.format(dbf))
            dump = DataBlockDumper(db)
            dump.as_file(dbf, compact=params.output.compact)

        return
Example #26
0
    def __init__(self, multiscaler):
        self.active_scalers = multiscaler.active_scalers
        self.experiment = multiscaler.experiment
        self.Imids = multiscaler.params.reflection_selection.combine.Imid
        # first copy across relevant data that's needed
        self.good_datasets = []
        for i, scaler in enumerate(self.active_scalers):
            if "intensity.prf.value" in scaler.reflection_table:
                self.good_datasets.append(i)
        self.datasets = [
            _make_reflection_table_from_scaler(self.active_scalers[i])
            for i in self.good_datasets
        ]
        raw_intensities = self._get_raw_intensity_array()
        logger.debug("length of raw intensity array: %s",
                     raw_intensities.size())
        self._determine_Imids(raw_intensities)

        header = ["Combination", "CC1/2", "Rmeas"]
        rows, results = self._test_Imid_combinations()
        st = simple_table(rows, header)
        logger.info(st.format())

        self.max_key = min(results, key=results.get)
        while results[self.max_key] < 0:
            del results[self.max_key]
            self.max_key = min(results, key=results.get)
        if self.max_key == 0:
            logger.info(
                "Profile intensities determined to be best for scaling. \n")
        elif self.max_key == 1:
            logger.info(
                "Summation intensities determined to be best for scaling. \n")
        else:
            logger.info(
                "Combined intensities with Imid = %s determined to be best for scaling. \n",
                self.max_key,
            )
Example #27
0
    def print_step_table(self):
        """print useful output about refinement steps in the form of a simple table"""

        from libtbx.table_utils import simple_table

        logger.info("\nRefinement steps:")

        rmsd_multipliers = []
        header = ["Step", "Nref"]
        for (name, units) in zip(self._target.rmsd_names,
                                 self._target.rmsd_units):
            if units == "mm":
                header.append(name + "\n(mm)")
                rmsd_multipliers.append(1.0)
            elif units == "rad":  # convert radians to degrees for reporting
                header.append(name + "\n(deg)")
                rmsd_multipliers.append(RAD2DEG)
            else:  # leave unknown units alone
                header.append(name + "\n(" + units + ")")

        rows = []
        for i in range(self._refinery.history.get_nrows()):
            rmsds = [
                r * m for (r, m) in zip(self._refinery.history["rmsd"][i],
                                        rmsd_multipliers)
            ]
            rows.append(
                [str(i),
                 str(self._refinery.history["num_reflections"][i])] +
                ["%.5g" % r for r in rmsds])

        st = simple_table(rows, header)
        logger.info(st.format())
        logger.info(self._refinery.history.reason_for_termination)

        return
  def run(self):

    print "Parsing input"
    params, options = self.parser.parse_args(show_diff_phil=True)

    #Configure the logging
    log.config(params.detector_phase.refinement.verbosity,
      info='dials.refine.log', debug='dials.refine.debug.log')

    # Try to obtain the models and data
    if not params.input.experiments:
      raise Sorry("No Experiments found in the input")
    if not params.input.reflections:
      raise Sorry("No reflection data found in the input")
    try:
      assert len(params.input.reflections) == len(params.input.experiments)
    except AssertionError:
      raise Sorry("The number of input reflections files does not match the "
        "number of input experiments")

    # set up global experiments and reflections lists
    from dials.array_family import flex
    reflections = flex.reflection_table()
    global_id = 0
    from dxtbx.model.experiment.experiment_list import ExperimentList
    experiments=ExperimentList()

    if params.reference_detector == "first":
      # Use the first experiment of the first experiment list as the reference detector
      ref_exp = params.input.experiments[0].data[0]
    else:
      # Average all the detectors to generate a reference detector
      assert params.detector_phase.refinement.parameterisation.detector.hierarchy_level == 0
      from scitbx.matrix import col
      panel_fasts = []
      panel_slows = []
      panel_oris = []
      for exp_wrapper in params.input.experiments:
        exp = exp_wrapper.data[0]
        if panel_oris:
          for i, panel in enumerate(exp.detector):
            panel_fasts[i] += col(panel.get_fast_axis())
            panel_slows[i] += col(panel.get_slow_axis())
            panel_oris[i] += col(panel.get_origin())
        else:
          for i, panel in enumerate(exp.detector):
            panel_fasts.append(col(panel.get_fast_axis()))
            panel_slows.append(col(panel.get_slow_axis()))
            panel_oris.append(col(panel.get_origin()))

      ref_exp = copy.deepcopy(params.input.experiments[0].data[0])
      for i, panel in enumerate(ref_exp.detector):
        # Averaging the fast and slow axes can make them be non-orthagonal. Fix by finding
        # the vector that goes exactly between them and rotate
        # around their cross product 45 degrees from that vector in either direction
        vf = panel_fasts[i]/len(params.input.experiments)
        vs = panel_slows[i]/len(params.input.experiments)
        c = vf.cross(vs)
        angle = vf.angle(vs, deg=True)
        v45 = vf.rotate(c, angle/2, deg=True)
        vf = v45.rotate(c, -45, deg=True)
        vs = v45.rotate(c, 45, deg=True)
        panel.set_frame(vf, vs,
                        panel_oris[i]/len(params.input.experiments))

      print "Reference detector (averaged):", str(ref_exp.detector)

    # set the experiment factory that combines a crystal with the reference beam
    # and the reference detector
    experiment_from_crystal=ExperimentFromCrystal(ref_exp.beam, ref_exp.detector)

    # keep track of the number of refl per accepted experiment for a table
    nrefs_per_exp = []

    # loop through the input, building up the global lists
    for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                        params.input.experiments):
      refs = ref_wrapper.data
      exps = exp_wrapper.data

      # there might be multiple experiments already here. Loop through them
      for i, exp in enumerate(exps):

        # select the relevant reflections
        sel = refs['id'] == i
        sub_ref = refs.select(sel)

        ## DGW commented out as reflections.minimum_number_of_reflections no longer exists
        #if len(sub_ref) < params.crystals_phase.refinement.reflections.minimum_number_of_reflections:
        #  print "skipping experiment", i, "in", exp_wrapper.filename, "due to insufficient strong reflections in", ref_wrapper.filename
        #  continue

        # build an experiment with this crystal plus the reference models
        combined_exp = experiment_from_crystal(exp.crystal)

        # next experiment ID in series
        exp_id = len(experiments)

        # check this experiment
        if not check_experiment(combined_exp, sub_ref):
          print "skipping experiment", i, "in", exp_wrapper.filename, "due to poor RMSDs"
          continue

        # set reflections ID
        sub_ref['id'] = flex.int(len(sub_ref), exp_id)

        # keep number of reflections for the table
        nrefs_per_exp.append(len(sub_ref))

        # obtain mm positions on the reference detector
        sub_ref = indexer_base.map_spots_pixel_to_mm_rad(sub_ref,
          combined_exp.detector, combined_exp.scan)

        # extend refl and experiments lists
        reflections.extend(sub_ref)
        experiments.append(combined_exp)

    # print number of reflections per accepted experiment
    from libtbx.table_utils import simple_table
    header = ["Experiment", "Nref"]
    rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
    st = simple_table(rows, header)
    print "Number of reflections per experiment"
    print st.format()

    for cycle in range(params.n_macrocycles):

      print "MACROCYCLE %02d" % (cycle + 1)
      print "=============\n"
      # first run: multi experiment joint refinement of detector with fixed beam and
      # crystals
      print "PHASE 1"

      # SET THIS TEST TO FALSE TO REFINE WHOLE DETECTOR AS SINGLE JOB
      if params.detector_phase.refinement.parameterisation.detector.hierarchy_level > 0:
        experiments = detector_parallel_refiners(params.detector_phase, experiments, reflections)
      else:
        experiments = detector_refiner(params.detector_phase, experiments, reflections)

      # second run
      print "PHASE 2"
      experiments = crystals_refiner(params.crystals_phase, experiments, reflections)

    # Save the refined experiments to file
    output_experiments_filename = params.output.experiments_filename
    print 'Saving refined experiments to {0}'.format(output_experiments_filename)
    from dxtbx.model.experiment.experiment_list import ExperimentListDumper
    dump = ExperimentListDumper(experiments)
    dump.as_json(output_experiments_filename)

    # Write out refined reflections, if requested
    if params.output.reflections_filename:
      print 'Saving refined reflections to {0}'.format(
        params.output.reflections_filename)
      reflections.as_pickle(params.output.reflections_filename)

    return
Example #29
0
  def run(self):
    '''Execute the script.'''

    from dials.util.options import flatten_experiments
    from libtbx.utils import Sorry

    # Parse the command line
    params, options = self.parser.parse_args(show_diff_phil=True)

    # Try to load the models and data
    if len(params.input.experiments) == 0:
      print "No Experiments found in the input"
      self.parser.print_help()
      return
    if len(params.input.reflections) == 0:
      print "No reflection data found in the input"
      self.parser.print_help()
      return
    try:
      assert len(params.input.reflections) == len(params.input.experiments)
    except AssertionError:
      raise Sorry("The number of input reflections files does not match the "
        "number of input experiments")

    flat_exps = flatten_experiments(params.input.experiments)

    ref_beam = params.reference_from_experiment.beam
    ref_goniometer = params.reference_from_experiment.goniometer
    ref_scan = params.reference_from_experiment.scan
    ref_crystal = params.reference_from_experiment.crystal
    ref_detector = params.reference_from_experiment.detector

    if ref_beam is not None:
      try:
        ref_beam = flat_exps[ref_beam].beam
      except IndexError:
        raise Sorry("{0} is not a valid experiment ID".format(ref_beam))

    if ref_goniometer is not None:
      try:
        ref_goniometer = flat_exps[ref_goniometer].goniometer
      except IndexError:
        raise Sorry("{0} is not a valid experiment ID".format(ref_goniometer))

    if ref_scan is not None:
      try:
        ref_scan = flat_exps[ref_scan].scan
      except IndexError:
        raise Sorry("{0} is not a valid experiment ID".format(ref_scan))

    if ref_crystal is not None:
      try:
        ref_crystal = flat_exps[ref_crystal].crystal
      except IndexError:
        raise Sorry("{0} is not a valid experiment ID".format(ref_crystal))

    if ref_detector is not None:
      assert not params.reference_from_experiment.average_detector
      try:
        ref_detector = flat_exps[ref_detector].detector
      except IndexError:
        raise Sorry("{0} is not a valid experiment ID".format(ref_detector))
    elif params.reference_from_experiment.average_detector:
      # Average all of the detectors together
      from scitbx.matrix import col
      def average_detectors(target, panelgroups, depth):
        # Recursive function to do the averaging

        if params.reference_from_experiment.average_hierarchy_level is None or \
            depth == params.reference_from_experiment.average_hierarchy_level:
          n = len(panelgroups)
          sum_fast = col((0.0,0.0,0.0))
          sum_slow = col((0.0,0.0,0.0))
          sum_ori  = col((0.0,0.0,0.0))

          # Average the d matrix vectors
          for pg in panelgroups:
            sum_fast += col(pg.get_local_fast_axis())
            sum_slow += col(pg.get_local_slow_axis())
            sum_ori  += col(pg.get_local_origin())
          sum_fast /= n
          sum_slow /= n
          sum_ori  /= n

          # Re-orthagonalize the slow and the fast vectors by rotating around the cross product
          c = sum_fast.cross(sum_slow)
          a = sum_fast.angle(sum_slow, deg=True)/2
          sum_fast = sum_fast.rotate(c, a-45, deg=True)
          sum_slow = sum_slow.rotate(c, -(a-45), deg=True)

          target.set_local_frame(sum_fast,sum_slow,sum_ori)

        if target.is_group():
          # Recurse
          for i, target_pg in enumerate(target):
            average_detectors(target_pg, [pg[i] for pg in panelgroups], depth+1)

      ref_detector = flat_exps[0].detector
      average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0)

    combine = CombineWithReference(beam=ref_beam, goniometer=ref_goniometer,
                  scan=ref_scan, crystal=ref_crystal, detector=ref_detector,
                  params=params)

    # set up global experiments and reflections lists
    from dials.array_family import flex
    reflections = flex.reflection_table()
    global_id = 0
    from dxtbx.model.experiment.experiment_list import ExperimentList
    experiments=ExperimentList()

    # loop through the input, building up the global lists
    nrefs_per_exp = []
    for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                        params.input.experiments):
      refs = ref_wrapper.data
      exps = exp_wrapper.data
      for i, exp in enumerate(exps):
        sel = refs['id'] == i
        sub_ref = refs.select(sel)
        nrefs_per_exp.append(len(sub_ref))
        sub_ref['id'] = flex.int(len(sub_ref), global_id)
        reflections.extend(sub_ref)
        experiments.append(combine(exp))
        global_id += 1

    # print number of reflections per experiment
    from libtbx.table_utils import simple_table
    header = ["Experiment", "Nref"]
    rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
    st = simple_table(rows, header)
    print st.format()

    # save a random subset if requested
    if params.output.n_subset is not None and len(experiments) > params.output.n_subset:
      import random
      subset_exp = ExperimentList()
      subset_refls = flex.reflection_table()
      n_picked = 0
      indices = range(len(experiments))
      while n_picked < params.output.n_subset:
        idx = indices.pop(random.randint(0, len(indices)-1))
        subset_exp.append(experiments[idx])
        refls = reflections.select(reflections['id'] == idx)
        refls['id'] = flex.int(len(refls), n_picked)
        subset_refls.extend(refls)
        n_picked += 1
      experiments = subset_exp
      reflections = subset_refls

    # save output
    from dxtbx.model.experiment.experiment_list import ExperimentListDumper
    print 'Saving combined experiments to {0}'.format(
      params.output.experiments_filename)
    dump = ExperimentListDumper(experiments)
    dump.as_json(params.output.experiments_filename)
    print 'Saving combined reflections to {0}'.format(
      params.output.reflections_filename)
    reflections.as_pickle(params.output.reflections_filename)

    return
Example #30
0
  def check_conditions(self,abs_lower_i_threshold=1e-6):
    table_labels = ('Operator',
      "# expected systematic absences",
      "<I/sigI> (violations)",
      "# expected non absences",
      "<I/sigI> (violations)",
      "# other reflections",
      "<I/sigI> (violations)",
      "Score")
    for  item in [0]: # absence_class in self.abs_check.absence_classes[ self.sg.group().crystal_system() ]:
      table_rows = []
      for condition in self.abs_check.absence_classes[
        str(sgtbx.space_group_info(
          group=self.sg.group().build_derived_reflection_intensity_group(False))\
            .as_reference_setting()
            ) ] : # crystal_system() ]:
        n_abs   = 0
        n_n_abs = 0
        n_tot   = 0
        n_abs_viol   = 0
        n_n_abs_viol = 0
        n_tot_viol   = 0

        isi_abs     = 0
        isi_n_abs   = 0
        isi_tot = 0

        i_abs     = 0
        i_n_abs   = 0
        i_tot  = 0

        score = 0

        for hkl, centric_flag, i, sigi in zip(self.miller_array.indices(), self.miller_array.centric_flags(), self.miller_array.data(), self.miller_array.sigmas() ):
          mc, cc = self.abs_check.check(condition,hkl, return_bool=True)
          if abs(i) < abs_lower_i_threshold:
            sigi=max(sigi,abs_lower_i_threshold)
          if mc: # mask checks out
            if cc: # not absent
              n_n_abs += 1
              isi_n_abs += i/sigi
              i_n_abs   += i
              # should be present. flag if not significant
              if i/sigi < self.cut:
                n_n_abs_viol += 1
              score += likelihood(i,sigi,centric_flag[1],self.sigma_inflation)
            else: #absent
              n_abs += 1
              isi_abs += i/sigi
              i_abs   += i
              # should be absent: flag if significant
              if i/sigi > self.cut:
                n_abs_viol += 1
              score += likelihood( i,sigi,None)
          else:
            n_tot +=1
            isi_tot += i/sigi
            i_tot += i
            if i/sigi <  self.cut:
              n_tot_viol += 1
        if n_abs > 0:
          isi_abs   = isi_abs/n_abs
          i_abs   = i_abs/n_abs
        if n_n_abs > 0:
          isi_n_abs = isi_n_abs/n_n_abs
          i_n_abs = i_n_abs/n_n_abs
        if n_tot > 0:
          isi_tot   = isi_tot/n_tot
          i_tot   = i_tot/n_tot

        self.n_abs.append(n_abs)
        self.n_n_abs.append(n_n_abs)
        self.n_tot.append(n_tot)
        self.n_abs_viol.append(n_abs_viol)
        self.n_n_abs_viol.append(n_n_abs_viol)
        self.n_tot_viol.append(n_tot_viol)

        self.isi_abs.append(isi_abs)
        self.isi_n_abs.append(isi_n_abs)
        self.isi_tot.append(isi_tot)

        self.i_abs.append(i_abs)
        self.i_n_abs.append(i_n_abs)
        self.i_tot.append(i_tot)

        self.op_name.append( condition )
        score = float(score)/max(1,n_abs+n_n_abs)
        self.score.append( score )

        table_rows.append( [condition,
          str("%8.0f"%(n_abs)),
          str("%8.2f  (%i, %4.1f%%)" % (isi_abs, n_abs_viol,
            100.0*float(n_abs_viol)/max(1,n_abs))),
          str("%8.0f"%(n_n_abs)),
          str("%8.2f  (%i, %4.1f%%)" % (isi_n_abs, n_n_abs_viol,
            100.0*float(n_n_abs_viol)/max(1,n_n_abs))),
          str("%8.0f"%(n_tot)),
          str("%8.2f  (%i, %4.1f%%)" % (isi_tot, n_tot_viol,
            100.0*float(n_tot_viol)/max(1,n_tot))),
          str("%8.2e"%(abs(score)))
        ])
      self.table = table_utils.simple_table(
        column_headers=table_labels,
        table_rows=table_rows)
Example #31
0
def sum_partial_reflections(reflection_table):
    """Sum partial reflections if more than one recording of a reflection present.

    This is a weighted sum for summation integration; weighted average for
    profile fitted reflections. N.B. this will report total partiality for
    the summed reflection.
    """
    nrefl = reflection_table.size()
    intensities = []
    for intensity in ["prf", "scale", "sum"]:
        if "intensity." + intensity + ".value" in reflection_table:
            intensities.append(intensity)

    isel = (reflection_table["partiality"] < 0.99).iselection()
    if not isel:
        return reflection_table

    # create map of partial_id to reflections
    delete = flex.size_t()
    partial_map = defaultdict(list)
    for j in isel:
        partial_map[reflection_table["partial_id"][j]].append(j)

    # now work through this map - get total partiality for every reflection;
    # here only consider reflections with > 1 component;
    partial_ids = []
    for p_id in partial_map:
        if len(partial_map[p_id]) > 1:
            partial_ids.append(p_id)

    header = ["Partial id", "Partiality"]
    for i in intensities:
        header.extend([str(i) + " intensity", str(i) + " variance"])
    rows = []

    # Now loop through 'matched' partials, summing and then deleting before return
    for p_id in partial_ids:
        j = partial_map[p_id]
        for i in j:
            data = [str(p_id), str(reflection_table["partiality"][i])]
            for intensity in intensities:
                data.extend(
                    [
                        str(reflection_table["intensity." + intensity + ".value"][i]),
                        str(
                            reflection_table["intensity." + intensity + ".variance"][i]
                        ),
                    ]
                )
            rows.append(data)

        # do the summing of the partiality values separately to allow looping
        # over multiple times
        total_partiality = sum([reflection_table["partiality"][i] for i in j])
        if "prf" in intensities:
            reflection_table = _sum_prf_partials(reflection_table, j)
        if "sum" in intensities:
            reflection_table = _sum_sum_partials(reflection_table, j)
        if "scale" in intensities:
            reflection_table = _sum_scale_partials(reflection_table, j)
        # FIXME now that the partials have been summed, should fractioncalc be set
        # to one (except for summation case?)
        reflection_table["partiality"][j[0]] = total_partiality
        delete.extend(flex.size_t(j[1:]))
        data = ["combined " + str(p_id), str(total_partiality)]
        for intensity in intensities:
            data.extend(
                [
                    str(reflection_table["intensity." + intensity + ".value"][j[0]]),
                    str(reflection_table["intensity." + intensity + ".variance"][j[0]]),
                ]
            )
        rows.append(data)
    reflection_table.del_selected(delete)
    if nrefl > reflection_table.size():
        logger.info(
            "Combined %s partial reflections with other partial reflections"
            % (nrefl - reflection_table.size())
        )
    logger.debug("\nSummary of combination of partial reflections")
    st = simple_table(rows, header)
    logger.debug(st.format())
    return reflection_table
Example #32
0
    def __call__(self, reflections):
        """Identify outliers in the input and set the centroid_outlier flag.
    Return True if any outliers were detected, otherwise False"""

        if self._verbosity > 0:
            logger.info("Detecting centroid outliers using the {0} algorithm".format(type(self).__name__))

        # check the columns are present
        for col in self._cols:
            assert col in reflections

        sel = reflections.get_flags(reflections.flags.used_in_refinement)
        all_data = reflections.select(sel)
        all_data_indices = sel.iselection()
        nexp = flex.max(all_data["id"]) + 1

        jobs = []
        if self._separate_experiments:
            # split the data set by experiment id
            for iexp in xrange(nexp):
                sel = all_data["id"] == iexp
                job = {
                    "id": iexp,
                    "panel": "all",
                    "data": all_data.select(sel),
                    "indices": all_data_indices.select(sel),
                }
                jobs.append(job)
        else:
            # keep the whole dataset across all experiment ids
            job = {"id": "all", "panel": "all", "data": all_data, "indices": all_data_indices}
            jobs.append(job)

        jobs2 = []
        if self._separate_panels:
            # split further by panel id
            for job in jobs:
                data = job["data"]
                iexp = job["id"]
                indices = job["indices"]
                for ipanel in xrange(flex.max(data["panel"]) + 1):
                    sel = data["panel"] == ipanel
                    job = {"id": iexp, "panel": ipanel, "data": data.select(sel), "indices": indices.select(sel)}
                    jobs2.append(job)
        else:
            # keep the splits as they are
            jobs2 = jobs

        jobs3 = []
        if self.get_block_width() is not None:
            # split into equal-sized phi ranges
            for job in jobs2:
                data = job["data"]
                iexp = job["id"]
                ipanel = job["panel"]
                indices = job["indices"]
                phi = data["xyzobs.mm.value"].parts()[2]
                if len(phi) == 0:  # detect no data in the job
                    jobs3.append(job)
                    continue
                phi_low = flex.min(phi)
                phi_range = flex.max(phi) - phi_low
                if phi_range == 0.0:  # detect stills and do not split
                    jobs3.append(job)
                    continue
                bw = self.get_block_width(iexp)
                if bw is None:  # detect no split for this experiment
                    jobs3.append(job)
                    continue
                nblocks = int(round(RAD2DEG * phi_range / bw))
                nblocks = max(1, nblocks)
                real_width = phi_range / nblocks
                block_end = 0.0
                for iblock in xrange(nblocks - 1):  # all except the last block
                    block_start = iblock * real_width
                    block_end = (iblock + 1) * real_width
                    sel = (phi >= (phi_low + block_start)) & (phi < (phi_low + block_end))
                    job = {
                        "id": iexp,
                        "panel": ipanel,
                        "data": data.select(sel),
                        "indices": indices.select(sel),
                        "phi_start": RAD2DEG * (phi_low + block_start),
                        "phi_end": RAD2DEG * (phi_low + block_end),
                    }
                    jobs3.append(job)
                # now last block
                sel = phi >= (phi_low + block_end)
                job = {
                    "id": iexp,
                    "panel": ipanel,
                    "data": data.select(sel),
                    "indices": indices.select(sel),
                    "phi_start": RAD2DEG * (phi_low + block_end),
                    "phi_end": RAD2DEG * (phi_low + phi_range),
                }
                jobs3.append(job)
        else:
            # keep the splits as they are
            jobs3 = jobs2

        # Work out the format of the jobs table
        if self._verbosity > 0:
            header = ["Job"]
            if self._separate_experiments:
                header.append("Exp\nid")
            if self._separate_panels:
                header.append("Panel\nid")
            if self.get_block_width() is not None:
                header.append("Block range\n(deg)")
            header.extend(["Nref", "Nout", "%out"])
            rows = []

        # now loop over the lowest level of splits
        for i, job in enumerate(jobs3):

            data = job["data"]
            indices = job["indices"]
            iexp = job["id"]
            ipanel = job["panel"]
            nref = len(indices)

            if nref >= self._min_num_obs:

                # get the subset of data as a list of columns
                cols = [data[col] for col in self._cols]

                # determine the position of outliers on this sub-dataset
                outliers = self._detect_outliers(cols)

                # get positions of outliers from the original matches
                ioutliers = indices.select(outliers)

            elif nref > 0:
                # too few reflections in the job
                msg = "For job {0}, fewer than {1} reflections are present.".format(i + 1, self._min_num_obs)
                msg += " All reflections flagged as possible outliers."
                if self._verbosity > 0:
                    logger.debug(msg)
                ioutliers = indices

            else:
                # no reflections in the job
                ioutliers = indices

            # set the centroid_outlier flag in the original reflection table
            nout = len(ioutliers)
            if nout > 0:
                reflections.set_flags(ioutliers, reflections.flags.centroid_outlier)
                self.nreject += nout

            # Add job data to the table
            if self._verbosity > 0:
                row = [str(i + 1)]
                if self._separate_experiments:
                    row.append(str(iexp))
                if self._separate_panels:
                    row.append(str(ipanel))
                if self.get_block_width() is not None:
                    try:
                        row.append("{phi_start:.2f} - {phi_end:.2f}".format(**job))
                    except KeyError:
                        row.append("{0:.2f} - {1:.2f}".format(0.0, 0.0))
                if nref == 0:
                    p100 = 0
                else:
                    p100 = nout / nref * 100.0
                    if p100 > 30.0:
                        msg = ("{0:3.1f}% of reflections were flagged as outliers from job" " {1}").format(p100, i + 1)
                row.extend([str(nref), str(nout), "%3.1f" % p100])
                rows.append(row)

        if self.nreject == 0:
            return False
        if self._verbosity > 0:
            logger.info("{0} reflections have been flagged as outliers".format(self.nreject))
            logger.debug("Outlier rejections per job:")
            st = simple_table(rows, header)
            logger.debug(st.format())

        return True
Example #33
0
    def print_exp_rmsd_table(self):
        """print useful output about refinement steps in the form of a simple table"""

        from libtbx.table_utils import simple_table

        logger.info("\nRMSDs by experiment:")

        header = ["Exp\nid", "Nref"]
        for (name, units) in zip(self._target.rmsd_names,
                                 self._target.rmsd_units):
            if name == "RMSD_X" or name == "RMSD_Y" and units == "mm":
                header.append(name + "\n(px)")
            elif name == "RMSD_Phi" and units == "rad":
                # will convert radians to images for reporting of scans
                header.append("RMSD_Z" + "\n(images)")
            elif units == "rad":
                # will convert other angles in radians to degrees (e.g. for
                # RMSD_DeltaPsi and RMSD_2theta)
                header.append(name + "\n(deg)")
            else:  # skip other/unknown RMSDs
                pass

        rows = []
        for iexp, exp in enumerate(self._experiments):
            detector = exp.detector
            px_sizes = [p.get_pixel_size() for p in detector]
            it = iter(px_sizes)
            px_size = next(it)
            if not all(tst == px_size for tst in it):
                logger.info(
                    "The detector in experiment %d does not have the same pixel "
                    + "sizes on each panel. Skipping...",
                    iexp,
                )
                continue
            px_per_mm = [1.0 / e for e in px_size]

            scan = exp.scan
            try:
                images_per_rad = 1.0 / abs(scan.get_oscillation(deg=False)[1])
            except (AttributeError, ZeroDivisionError):
                images_per_rad = None

            raw_rmsds = self._target.rmsds_for_experiment(iexp)
            if raw_rmsds is None:
                continue  # skip experiments where rmsd cannot be calculated
            num = self._target.get_num_matches_for_experiment(iexp)
            rmsds = []
            for (name, units, rmsd) in zip(self._target.rmsd_names,
                                           self._target.rmsd_units, raw_rmsds):
                if name == "RMSD_X" and units == "mm":
                    rmsds.append(rmsd * px_per_mm[0])
                elif name == "RMSD_Y" and units == "mm":
                    rmsds.append(rmsd * px_per_mm[1])
                elif name == "RMSD_Phi" and units == "rad":
                    rmsds.append(rmsd * images_per_rad)
                elif units == "rad":
                    rmsds.append(rmsd * RAD2DEG)
            rows.append([str(iexp), str(num)] + ["%.5g" % r for r in rmsds])

        if len(rows) > 0:
            st = simple_table(rows, header)
            logger.info(st.format())

        return
Example #34
0
    def run(self):

        print("Parsing input")
        params, options = self.parser.parse_args(show_diff_phil=True)

        #Configure the logging
        log.config(params.detector_phase.refinement.verbosity,
                   info='dials.refine.log',
                   debug='dials.refine.debug.log')

        # Try to obtain the models and data
        if not params.input.experiments:
            raise Sorry("No Experiments found in the input")
        if not params.input.reflections:
            raise Sorry("No reflection data found in the input")
        try:
            assert len(params.input.reflections) == len(
                params.input.experiments)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input experiments")

        # set up global experiments and reflections lists
        from dials.array_family import flex
        reflections = flex.reflection_table()
        global_id = 0
        from dxtbx.model.experiment_list import ExperimentList
        experiments = ExperimentList()

        if params.reference_detector == "first":
            # Use the first experiment of the first experiment list as the reference detector
            ref_exp = params.input.experiments[0].data[0]
        else:
            # Average all the detectors to generate a reference detector
            assert params.detector_phase.refinement.parameterisation.detector.hierarchy_level == 0
            from scitbx.matrix import col
            panel_fasts = []
            panel_slows = []
            panel_oris = []
            for exp_wrapper in params.input.experiments:
                exp = exp_wrapper.data[0]
                if panel_oris:
                    for i, panel in enumerate(exp.detector):
                        panel_fasts[i] += col(panel.get_fast_axis())
                        panel_slows[i] += col(panel.get_slow_axis())
                        panel_oris[i] += col(panel.get_origin())
                else:
                    for i, panel in enumerate(exp.detector):
                        panel_fasts.append(col(panel.get_fast_axis()))
                        panel_slows.append(col(panel.get_slow_axis()))
                        panel_oris.append(col(panel.get_origin()))

            ref_exp = copy.deepcopy(params.input.experiments[0].data[0])
            for i, panel in enumerate(ref_exp.detector):
                # Averaging the fast and slow axes can make them be non-orthagonal. Fix by finding
                # the vector that goes exactly between them and rotate
                # around their cross product 45 degrees from that vector in either direction
                vf = panel_fasts[i] / len(params.input.experiments)
                vs = panel_slows[i] / len(params.input.experiments)
                c = vf.cross(vs)
                angle = vf.angle(vs, deg=True)
                v45 = vf.rotate(c, angle / 2, deg=True)
                vf = v45.rotate(c, -45, deg=True)
                vs = v45.rotate(c, 45, deg=True)
                panel.set_frame(vf, vs,
                                panel_oris[i] / len(params.input.experiments))

            print("Reference detector (averaged):", str(ref_exp.detector))

        # set the experiment factory that combines a crystal with the reference beam
        # and the reference detector
        experiment_from_crystal = ExperimentFromCrystal(
            ref_exp.beam, ref_exp.detector)

        # keep track of the number of refl per accepted experiment for a table
        nrefs_per_exp = []

        # loop through the input, building up the global lists
        for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                            params.input.experiments):
            refs = ref_wrapper.data
            exps = exp_wrapper.data

            # there might be multiple experiments already here. Loop through them
            for i, exp in enumerate(exps):

                # select the relevant reflections
                sel = refs['id'] == i
                sub_ref = refs.select(sel)

                ## DGW commented out as reflections.minimum_number_of_reflections no longer exists
                #if len(sub_ref) < params.crystals_phase.refinement.reflections.minimum_number_of_reflections:
                #  print "skipping experiment", i, "in", exp_wrapper.filename, "due to insufficient strong reflections in", ref_wrapper.filename
                #  continue

                # build an experiment with this crystal plus the reference models
                combined_exp = experiment_from_crystal(exp.crystal)

                # next experiment ID in series
                exp_id = len(experiments)

                # check this experiment
                if not check_experiment(combined_exp, sub_ref):
                    print("skipping experiment", i, "in", exp_wrapper.filename,
                          "due to poor RMSDs")
                    continue

                # set reflections ID
                sub_ref['id'] = flex.int(len(sub_ref), exp_id)

                # keep number of reflections for the table
                nrefs_per_exp.append(len(sub_ref))

                # obtain mm positions on the reference detector
                sub_ref = indexer_base.map_spots_pixel_to_mm_rad(
                    sub_ref, combined_exp.detector, combined_exp.scan)

                # extend refl and experiments lists
                reflections.extend(sub_ref)
                experiments.append(combined_exp)

        # print number of reflections per accepted experiment
        from libtbx.table_utils import simple_table
        header = ["Experiment", "Nref"]
        rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
        st = simple_table(rows, header)
        print("Number of reflections per experiment")
        print(st.format())

        for cycle in range(params.n_macrocycles):

            print("MACROCYCLE %02d" % (cycle + 1))
            print("=============\n")
            # first run: multi experiment joint refinement of detector with fixed beam and
            # crystals
            print("PHASE 1")

            # SET THIS TEST TO FALSE TO REFINE WHOLE DETECTOR AS SINGLE JOB
            if params.detector_phase.refinement.parameterisation.detector.hierarchy_level > 0:
                experiments = detector_parallel_refiners(
                    params.detector_phase, experiments, reflections)
            else:
                experiments = detector_refiner(params.detector_phase,
                                               experiments, reflections)

            # second run
            print("PHASE 2")
            experiments = crystals_refiner(params.crystals_phase, experiments,
                                           reflections)

        # Save the refined experiments to file
        output_experiments_filename = params.output.experiments_filename
        print('Saving refined experiments to {0}'.format(
            output_experiments_filename))
        from dxtbx.model.experiment_list import ExperimentListDumper
        dump = ExperimentListDumper(experiments)
        dump.as_json(output_experiments_filename)

        # Write out refined reflections, if requested
        if params.output.reflections_filename:
            print('Saving refined reflections to {0}'.format(
                params.output.reflections_filename))
            reflections.as_pickle(params.output.reflections_filename)

        return
  def _show_impl (self, out) :
    out.show_header("SAD experiment planning")
    out.show_sub_header(
      "Dataset overall I/sigma required to solve a structure")

    self.show_characteristics(out=out)

    out.show_preformatted_text("""
-------Targets for entire dataset-------  ----------Likely outcome-----------""")

    if (len(self.table_rows) == 0) :
      out.show_text("SAD solution unlikely with the given parameters.")
      return
    if (not out.gui_output) :
      out.show_preformatted_text("""
                              Anomalous    Useful    Useful
                            Half-dataset  Anom CC   Anomalous
 Dmin   N     I/sigI sigF/F     CC       (cc*_anom)  Signal   P(Substr)   FOM
                      (%)                                        (%)
""")
      for row in self.table_rows :
        out.show_preformatted_text(
        "%s%s%s%s     %s       %s      %s        %s       %s" %
          tuple(row))
    else :
      table = table_utils.simple_table(
        table_rows=self.table_rows,
        column_headers=["d_min", "N", "I/sigI", "sigF/F (%)",
          "Half-dataset CC_ano", "CC*_ano", "Anom. signal","P(Substr)","FOM"])
      out.show_table(table)
    (dmin,nsites,nrefl,fpp,i_over_sigma,sigf,cc_half_weak,cc_half,cc_ano_weak,
      cc_ano,s_ano,solved,fom) = tuple(self.representative_values)

    if self.missed_target_resolutions:
      self.missed_target_resolutions.sort()
      extra_note=""
      if self.used_max_i_over_sigma:
        extra_note="I/sigma shown is value of max_i_over_sigma."
      elif not self.input_i_over_sigma:
        extra_note="I/sigma shown achieves about %3.0f%% of \nmaximum anomalous signal." %(self.ratio_for_failure*100.)
      out.show_text("""
Note: Target anomalous signal not achievable with tested I/sigma (up to %d )
for resolutions of %5.2f A and lower. %s
""" % (int(self.max_i_over_sigma),self.missed_target_resolutions[0],extra_note))

    if self.skipped_resolutions:
      self.skipped_resolutions.sort()
      out.show_text("""
Note: No plausible values of I/sigma found for  resolutions of %5.2f A
and lower.
""" % (self.skipped_resolutions[0]))


    out.show_text("""
This table says that if you collect your data to a resolution of %5.1f A with
an overall <I>/<sigma> of about %3.0f then the half-dataset anomalous
correlation should be about %5.2f (typically within a factor of 2).  This
should lead to a correlation of your anomalous data to true anomalous
differences (CC*_ano) of about %5.2f, and a useful anomalous signal around
%3.0f (again within a factor of about two). With this value of estimated
anomalous signal the probability of finding the anomalous substructure is
about %3d%% (based on estimated anomalous signal and actual outcomes for
real structures), and the estimated figure of merit of phasing is %3.2f.""" % (dmin, i_over_sigma,  cc_half,  cc_ano,
        s_ano, int(solved), fom))
    out.show_text("""
The value of sigF/F (actually rms(sigF)/rms(F)) is approximately the inverse
of I/sigma. The calculations are based on rms(sigF)/rms(F).

Note that these values assume data measured with little radiation damage or at
least with anomalous pairs measured close in time. The values also assume that
the anomalously-scattering atoms are nearly as well-ordered as other atoms.
If your crystal does not fit these assumptions it may be necessary to collect
data with even higher I/sigma than indicated here.

Note also that anomalous signal is roughly proportional to the anomalous
structure factors at a given resolution. That means that if you have 50%
occupancy of your anomalous atoms, the signal will be 50% of what it otherwise
would be.  Also it means that if your anomalously scattering atoms only
contribute to 5 A, you should only consider data to 5 A in this analysis.
""")
    out.show_paragraph_header("""What to do next:""")
    out.show_text("""
1. Collect your data, trying to obtain a value of I/sigma for the whole dataset
   at least as high as your target.""")
    out.show_text("""\
2. Scale and analyze your unmerged data with phenix.scale_and_merge to get
   accurate scaled and merged data as well as two half-dataset data files
   that can be used to estimate the quality of your data.""")
    out.show_text("""\
3. Analyze your anomalous data (the scaled merged data and the two half-datdaset
   data files) with phenix.anomalous_signal to estimate the anomalous signal
   in your data. This tool will again guess the fraction of the substructure
   that can be obtained with your data, this time with knowledge of the
   actual anomalous signal.  It will also estimate the figure of merit of
   phasing that you can obtain once you solve the substruture. """)
    out.show_text("""\
4. Compare the anomalous signal in your measured data with the
   estimated values in the table above. If they are lower than expected
   you may need to collect more data to obtain the target anomalous signal.""")
  def _show_impl (self, out) :
    assert (self.b_cart is not None)
    out.show_sub_header("Maximum likelihood anisotropic Wilson scaling")
    out.show("ML estimate of overall B_cart value:")
    out.show_preformatted_text("""\
  %5.2f, %5.2f, %5.2f
  %12.2f, %5.2f
  %19.2f
""" % (self.b_cart[0], self.b_cart[3], self.b_cart[4],
                       self.b_cart[1], self.b_cart[5],
                                       self.b_cart[2]))
    out.show("Equivalent representation as U_cif:")
    out.show_preformatted_text("""\
  %5.2f, %5.2f, %5.2f
  %12.2f, %5.2f
  %19.2f
""" % (self.u_cif[0], self.u_cif[3], self.u_cif[4],
                      self.u_cif[1], self.u_cif[5],
                                     self.u_cif[2]))
    out.show("Eigen analyses of B-cart:")
    def format_it (x,format="%3.2f"):
      xx = format%(x)
      if x > 0:
        xx = " "+xx
      return(xx)
    rows = [
      [ "1", format_it(self.eigen_values[0],"%5.3f"),
       "(%s, %s, %s)" % (format_it(self.eigen_vectors[0]),
       format_it(self.eigen_vectors[1]),
       format_it(self.eigen_vectors[2])) ],
      [ "2", format_it(self.eigen_values[1],"%5.3f"),
       "(%s, %s, %s)" % (format_it(self.eigen_vectors[3]),
       format_it(self.eigen_vectors[4]),
       format_it(self.eigen_vectors[5])) ],
      [ "3", format_it(self.eigen_values[2],"%5.3f"),
       "(%s, %s, %s)" % (format_it(self.eigen_vectors[6]),
       format_it(self.eigen_vectors[7]),
       format_it(self.eigen_vectors[8])) ],
    ]
    table = table_utils.simple_table(
      column_headers=["Eigenvector", "Value", "Vector"],
      table_rows=rows)
    out.show_table(table)
    out.show("ML estimate of  -log of scale factor:")
    out.show_preformatted_text("  %5.2f" %(self.p_scale))
    out.show_sub_header("Anisotropy analyses")
    if (self.eigen_values[0] == 0) :
      raise Sorry("Fatal error: eigenvector 1 of the overall anisotropic "+
        "B-factor B_cart is zero.  This "+
        "may indicate severe problems with the input data, for instance "+
        "if only a single plane through reciprocal space is present.")
#    ani_rat_p = self.aniso_ratio_p_value(self.anirat)
#    if ani_rat_p < 0:
#      ani_rat_p = 0.0
#    out.show_preformatted_text("""\
#Anisotropy    ( [MaxAnisoB-MinAnisoB]/[MaxAnisoB] ) :  %7.3e
#                          Anisotropic ratio p-value :  %7.3e
#""" % (self.anirat, ani_rat_p))
#    out.show("""
# The p-value is a measure of the severity of anisotropy as observed in the PDB.
# The p-value of %5.3e indicates that roughly %4.1f %% of datasets available in
# the PDB have an anisotropy equal to or worse than this dataset.""" %
#      (ani_rat_p, 100.0*math.exp(-ani_rat_p)))
    message = """indicates that there probably is no
 significant systematic noise amplification."""
    if (self.z_tot is not None) and (self.z_tot > self.z_level) :
      if self.mean_isigi_high_correction_factor < self.level:
        message =  """indicates that there probably is significant
 systematic noise amplification that could possibly lead to artefacts in the
 maps or difficulties in refinement"""
      else:
        message =  """indicates that there probably is some
 systematic dependence between the anisotropy and not-so-well-defined
 intensities. Because the signal to noise for the most affected intensities
 is relatively good, the affect on maps or refinement behavior is most likely
 not very serious."""
    if (self.mean_count is not None) :
      out.show("""
 For the resolution shell spanning between %4.2f - %4.2f Angstrom,
 the mean I/sigI is equal to %5.2f. %4.1f %% of these intensities have
 an I/sigI > 3. When sorting these intensities by their anisotropic
 correction factor and analysing the I/sigI behavior for this ordered
 list, we can gauge the presence of 'anisotropy induced noise amplification'
 in reciprocal space.
""" % (self.max_d, self.min_d, self.mean_isigi, 100.0*self.mean_count))
      out.show("""\
 The quarter of Intensities *least* affected by the anisotropy correction show
""")
      out.show_preformatted_text("""\
    <I/sigI>                 :   %5.2e
    Fraction of I/sigI > 3   :   %5.2e     ( Z = %8.2f )""" %
        (self.mean_isigi_low_correction_factor,
        self.frac_below_low_correction,
        self.z_low))
      out.show("""\
  The quarter of Intensities *most* affected by the anisotropy correction show
""")
      out.show_preformatted_text("""\
    <I/sigI>                 :   %5.2e
    Fraction of I/sigI > 3   :   %5.2e     ( Z = %8.2f )""" %
        (self.mean_isigi_high_correction_factor,
         self.frac_below_high_correction,
         self.z_high))
      #out.show(""" The combined Z-score of %8.2f %s""" % (self.z_tot,
      #  message))
      out.show("""\
 Z-scores are computed on the basis of a Bernoulli model assuming independence
 of weak reflections with respect to anisotropy.""")
Example #37
0
  def __call__(self, reflections):
    """Identify outliers in the input and set the centroid_outlier flag.
    Return True if any outliers were detected, otherwise False"""

    # check the columns are present
    for col in self._cols: assert reflections.has_key(col)

    sel = reflections.get_flags(reflections.flags.used_in_refinement)
    all_data = reflections.select(sel)
    all_data_indices = sel.iselection()
    nexp = flex.max(all_data['id']) + 1

    jobs = []
    if self._separate_experiments:
      # split the data set by experiment id
      for iexp in xrange(nexp):
        sel = all_data['id'] == iexp
        job = {'id':iexp, 'panel':'all', 'data':all_data.select(sel),
               'indices':all_data_indices.select(sel)}
        jobs.append(job)
    else:
      # keep the whole dataset across all experiment ids
      job = {'id':'all', 'panel':'all', 'data':all_data,
             'indices':all_data_indices}
      jobs.append(job)

    jobs2 = []
    if self._separate_panels:
      # split further by panel id
      for job in jobs:
        data = job['data']
        iexp = job['id']
        indices = job['indices']
        for ipanel in xrange(flex.max(data['panel']) + 1):
          sel = data['panel'] == ipanel
          job2 = {'id':iexp, 'panel':ipanel, 'data':data.select(sel),
                  'indices':indices.select(sel)}
          jobs2.append(job2)
    else:
      # keep the splits as they are
      jobs2 = jobs

    # now loop over the lowest level of splits
    for job in jobs2:

      data = job['data']
      indices = job['indices']
      iexp = job['id']
      ipanel = job['panel']

      if len(indices) >= self._min_num_obs:

        # get the subset of data as a list of columns
        cols = [data[col] for col in self._cols]

        # determine the position of outliers on this sub-dataset
        outliers = self._detect_outliers(cols)

        # get positions of outliers from the original matches
        ioutliers = indices.select(outliers)

      else:
        msg = "For experiment: {0} and panel: {1}, ".format(iexp, ipanel)
        msg += "only {0} reflections are present. ".format(len(indices))
        msg += "All of these flagged as possible outliers."
        if self._verbosity > 0: debug(msg)
        ioutliers = indices

      # set those reflections as outliers in the original reflection table
      reflections.set_flags(ioutliers,
        reflections.flags.centroid_outlier)

      self.nreject += len(ioutliers)

    if self.nreject == 0: return False

    if self._verbosity > 0:
      info("{0} reflections have been flagged as outliers".format(self.nreject))

    if nexp > 1 and self._verbosity > 0:
      # table of rejections per experiment
      from libtbx.table_utils import simple_table
      header = ["Exp\nid", "Nref", "Nout", "%out"]
      rows = []
      outlier_sel = reflections.get_flags(reflections.flags.centroid_outlier)
      outliers = reflections.select(outlier_sel)
      for iexp in xrange(nexp):
        nref = (reflections['id'] == iexp).count(True)
        nout = (outliers['id'] == iexp).count(True)
        if nref == 0:
          p100 = 0
          msg = ("No reflections associated with"
                 " Experiment with id {0}").format(iexp)
          warning(msg)
        else:
          p100 = nout / nref * 100.0
          if p100 > 30.0:
            msg = ("{0:3.1f}% of reflections were flagged as outliers from the"
                   " Experiment with id {1}").format(p100, iexp)
            warning(msg)
        rows.append(["%d" % iexp, "%d" % nref, "%d" % nout, "%3.1f" % p100])
      st = simple_table(rows, header)
      debug("Outlier rejections per experiment:")
      debug(st.format())

    return True
    def run_with_preparsed(self, params, options):
        """Run combine_experiments, but allow passing in of parameters"""
        from dials.util.options import flatten_experiments

        # Try to load the models and data
        if len(params.input.experiments) == 0:
            print("No Experiments found in the input")
            self.parser.print_help()
            return
        if len(params.input.reflections) == 0:
            print("No reflection data found in the input")
            self.parser.print_help()
            return
        try:
            assert len(params.input.reflections) == len(
                params.input.experiments)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input experiments")

        flat_exps = flatten_experiments(params.input.experiments)

        ref_beam = params.reference_from_experiment.beam
        ref_goniometer = params.reference_from_experiment.goniometer
        ref_scan = params.reference_from_experiment.scan
        ref_crystal = params.reference_from_experiment.crystal
        ref_detector = params.reference_from_experiment.detector

        if ref_beam is not None:
            try:
                ref_beam = flat_exps[ref_beam].beam
            except IndexError:
                raise Sorry("{} is not a valid experiment ID".format(ref_beam))

        if ref_goniometer is not None:
            try:
                ref_goniometer = flat_exps[ref_goniometer].goniometer
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_goniometer))

        if ref_scan is not None:
            try:
                ref_scan = flat_exps[ref_scan].scan
            except IndexError:
                raise Sorry("{} is not a valid experiment ID".format(ref_scan))

        if ref_crystal is not None:
            try:
                ref_crystal = flat_exps[ref_crystal].crystal
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_crystal))

        if ref_detector is not None:
            assert not params.reference_from_experiment.average_detector
            try:
                ref_detector = flat_exps[ref_detector].detector
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_detector))
        elif params.reference_from_experiment.average_detector:
            # Average all of the detectors together
            from scitbx.matrix import col

            def average_detectors(target, panelgroups, depth):
                # Recursive function to do the averaging

                if (params.reference_from_experiment.average_hierarchy_level is
                        None or depth == params.reference_from_experiment.
                        average_hierarchy_level):
                    n = len(panelgroups)
                    sum_fast = col((0.0, 0.0, 0.0))
                    sum_slow = col((0.0, 0.0, 0.0))
                    sum_ori = col((0.0, 0.0, 0.0))

                    # Average the d matrix vectors
                    for pg in panelgroups:
                        sum_fast += col(pg.get_local_fast_axis())
                        sum_slow += col(pg.get_local_slow_axis())
                        sum_ori += col(pg.get_local_origin())
                    sum_fast /= n
                    sum_slow /= n
                    sum_ori /= n

                    # Re-orthagonalize the slow and the fast vectors by rotating around the cross product
                    c = sum_fast.cross(sum_slow)
                    a = sum_fast.angle(sum_slow, deg=True) / 2
                    sum_fast = sum_fast.rotate(c, a - 45, deg=True)
                    sum_slow = sum_slow.rotate(c, -(a - 45), deg=True)

                    target.set_local_frame(sum_fast, sum_slow, sum_ori)

                if target.is_group():
                    # Recurse
                    for i, target_pg in enumerate(target):
                        average_detectors(target_pg,
                                          [pg[i] for pg in panelgroups],
                                          depth + 1)

            ref_detector = flat_exps[0].detector
            average_detectors(ref_detector.hierarchy(),
                              [e.detector.hierarchy() for e in flat_exps], 0)

        combine = CombineWithReference(
            beam=ref_beam,
            goniometer=ref_goniometer,
            scan=ref_scan,
            crystal=ref_crystal,
            detector=ref_detector,
            params=params,
        )

        # set up global experiments and reflections lists
        from dials.array_family import flex

        reflections = flex.reflection_table()
        global_id = 0
        skipped_expts = 0
        from dxtbx.model.experiment_list import ExperimentList

        experiments = ExperimentList()

        # loop through the input, building up the global lists
        nrefs_per_exp = []
        for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                            params.input.experiments):
            refs = ref_wrapper.data
            exps = exp_wrapper.data
            for i, exp in enumerate(exps):
                sel = refs["id"] == i
                sub_ref = refs.select(sel)
                n_sub_ref = len(sub_ref)
                if (params.output.min_reflections_per_experiment is not None
                        and n_sub_ref <
                        params.output.min_reflections_per_experiment):
                    skipped_expts += 1
                    continue

                nrefs_per_exp.append(n_sub_ref)
                sub_ref["id"] = flex.int(len(sub_ref), global_id)
                if params.output.delete_shoeboxes and "shoebox" in sub_ref:
                    del sub_ref["shoebox"]
                reflections.extend(sub_ref)
                try:
                    experiments.append(combine(exp))
                except ComparisonError as e:
                    # When we failed tolerance checks, give a useful error message
                    (path,
                     index) = find_experiment_in(exp, params.input.experiments)
                    raise Sorry(
                        "Model didn't match reference within required tolerance for experiment {} in {}:"
                        "\n{}\nAdjust tolerances or set compare_models=False to ignore differences."
                        .format(index, path, str(e)))

                global_id += 1

        if (params.output.min_reflections_per_experiment is not None
                and skipped_expts > 0):
            print("Removed {0} experiments with fewer than {1} reflections".
                  format(skipped_expts,
                         params.output.min_reflections_per_experiment))

        # print number of reflections per experiment
        from libtbx.table_utils import simple_table

        header = ["Experiment", "Number of reflections"]
        rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
        st = simple_table(rows, header)
        print(st.format())

        # save a random subset if requested
        if (params.output.n_subset is not None
                and len(experiments) > params.output.n_subset):
            subset_exp = ExperimentList()
            subset_refls = flex.reflection_table()
            if params.output.n_subset_method == "random":
                n_picked = 0
                indices = list(range(len(experiments)))
                while n_picked < params.output.n_subset:
                    idx = indices.pop(random.randint(0, len(indices) - 1))
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), n_picked)
                    subset_refls.extend(refls)
                    n_picked += 1
                print(
                    "Selecting a random subset of {0} experiments out of {1} total."
                    .format(params.output.n_subset, len(experiments)))
            elif params.output.n_subset_method == "n_refl":
                if params.output.n_refl_panel_list is None:
                    refls_subset = reflections
                else:
                    sel = flex.bool(len(reflections), False)
                    for p in params.output.n_refl_panel_list:
                        sel |= reflections["panel"] == p
                    refls_subset = reflections.select(sel)
                refl_counts = flex.int()
                for expt_id in range(len(experiments)):
                    refl_counts.append(
                        len(refls_subset.select(
                            refls_subset["id"] == expt_id)))
                sort_order = flex.sort_permutation(refl_counts, reverse=True)
                for expt_id, idx in enumerate(
                        sort_order[:params.output.n_subset]):
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), expt_id)
                    subset_refls.extend(refls)
                print(
                    "Selecting a subset of {0} experiments with highest number of reflections out of {1} total."
                    .format(params.output.n_subset, len(experiments)))

            elif params.output.n_subset_method == "significance_filter":
                from dials.algorithms.integration.stills_significance_filter import (
                    SignificanceFilter, )

                params.output.significance_filter.enable = True
                sig_filter = SignificanceFilter(params.output)
                refls_subset = sig_filter(experiments, reflections)
                refl_counts = flex.int()
                for expt_id in range(len(experiments)):
                    refl_counts.append(
                        len(refls_subset.select(
                            refls_subset["id"] == expt_id)))
                sort_order = flex.sort_permutation(refl_counts, reverse=True)
                for expt_id, idx in enumerate(
                        sort_order[:params.output.n_subset]):
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), expt_id)
                    subset_refls.extend(refls)

            experiments = subset_exp
            reflections = subset_refls

        def save_in_batches(experiments,
                            reflections,
                            exp_name,
                            refl_name,
                            batch_size=1000):
            from dxtbx.command_line.image_average import splitit

            for i, indices in enumerate(
                    splitit(list(range(len(experiments))),
                            (len(experiments) // batch_size) + 1)):
                batch_expts = ExperimentList()
                batch_refls = flex.reflection_table()
                for sub_id, sub_idx in enumerate(indices):
                    batch_expts.append(experiments[sub_idx])
                    sub_refls = reflections.select(
                        reflections["id"] == sub_idx)
                    sub_refls["id"] = flex.int(len(sub_refls), sub_id)
                    batch_refls.extend(sub_refls)
                exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i
                ref_filename = os.path.splitext(
                    refl_name)[0] + "_%03d.refl" % i
                self._save_output(batch_expts, batch_refls, exp_filename,
                                  ref_filename)

        def combine_in_clusters(experiments_l, reflections_l, exp_name,
                                refl_name, end_count):
            result = []
            for cluster, experiment in enumerate(experiments_l):
                cluster_expts = ExperimentList()
                cluster_refls = flex.reflection_table()
                for i, expts in enumerate(experiment):
                    refls = reflections_l[cluster][i]
                    refls["id"] = flex.int(len(refls), i)
                    cluster_expts.append(expts)
                    cluster_refls.extend(refls)
                exp_filename = os.path.splitext(exp_name)[0] + (
                    "_cluster%d.expt" % (end_count - cluster))
                ref_filename = os.path.splitext(refl_name)[0] + (
                    "_cluster%d.refl" % (end_count - cluster))
                result.append(
                    (cluster_expts, cluster_refls, exp_filename, ref_filename))
            return result

        # cluster the resulting experiments if requested
        if params.clustering.use:
            clustered = Cluster(
                experiments,
                reflections,
                dendrogram=params.clustering.dendrogram,
                threshold=params.clustering.threshold,
                n_max=params.clustering.max_crystals,
            )
            n_clusters = len(clustered.clustered_frames)

            def not_too_many(keeps):
                if params.clustering.max_clusters is not None:
                    return len(keeps) < params.clustering.max_clusters
                return True

            keep_frames = []
            sorted_keys = sorted(clustered.clustered_frames.keys())
            while len(clustered.clustered_frames) > 0 and not_too_many(
                    keep_frames):
                keep_frames.append(
                    clustered.clustered_frames.pop(sorted_keys.pop(-1)))
            if params.clustering.exclude_single_crystal_clusters:
                keep_frames = [k for k in keep_frames if len(k) > 1]
            clustered_experiments = [[f.experiment for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            clustered_reflections = [[f.reflections for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            list_of_combined = combine_in_clusters(
                clustered_experiments,
                clustered_reflections,
                params.output.experiments_filename,
                params.output.reflections_filename,
                n_clusters,
            )
            for saveable_tuple in list_of_combined:
                if params.output.max_batch_size is None:
                    self._save_output(*saveable_tuple)
                else:
                    save_in_batches(*saveable_tuple,
                                    batch_size=params.output.max_batch_size)
        else:
            if params.output.max_batch_size is None:
                self._save_output(
                    experiments,
                    reflections,
                    params.output.experiments_filename,
                    params.output.reflections_filename,
                )
            else:
                save_in_batches(
                    experiments,
                    reflections,
                    params.output.experiments_filename,
                    params.output.reflections_filename,
                    batch_size=params.output.max_batch_size,
                )
        return
Example #39
0
  def __init__(self,
      miller_array,
      threshold = 3,
      protein=True,
      print_all=True,
      sigma_inflation=1.0,
      original_data=None):
    self.threshold = 3.0
    assert miller_array.is_xray_intensity_array()
    self.miller_array = miller_array.deep_copy().f_sq_as_f(
      ).average_bijvoet_mates().f_as_f_sq().map_to_asu()
    space_group = self.miller_array.space_group()

    self.absences_table = analyze_absences(
      miller_array=self.miller_array,
      isigi_cut=threshold,
      sigma_inflation=sigma_inflation)
    if (original_data is not None) :
      self.absences_list = absences_list(obs=original_data,
        was_filtered=False)
    else :
      self.absences_list = absences_list(obs=self.miller_array,
        was_filtered=True)

    self.sg_iterator = sgi_iterator(chiral = True,
      intensity_symmetry = \
        space_group.build_derived_reflection_intensity_group(False) )

    self.sg_choices  = []
    self.mean_i      = []
    self.mean_isigi  = []
    self.n           = []
    self.violations  = []
    self.abs_types   = []
    self.tuple_score = []

    score = []

    for sg in self.sg_iterator.list():
      xs = crystal.symmetry(
        unit_cell = self.miller_array.unit_cell(),
        space_group = sg.group())
      tmp_miller = self.miller_array.customized_copy( crystal_symmetry = xs )
      these_absent_millers = tmp_miller.select(
        tmp_miller.sys_absent_flags().data() )

      if these_absent_millers.data().size() > 0:
        tmp_mean_i = flex.mean( these_absent_millers.data() )
        zero_sel = these_absent_millers.sigmas()==0
        these_absent_millers = these_absent_millers.select(~zero_sel)
        #print sg, list(these_absent_millers.indices()), list(these_absent_millers.data())
        tmp_mean_isigi = flex.mean(
          these_absent_millers.data() / these_absent_millers.sigmas() )
        tmp_n = these_absent_millers.data().size()
        tmp_violations = flex.bool( these_absent_millers.data() /
          these_absent_millers.sigmas() > self.threshold ).count( True )
      else:
        tmp_mean_i = 0
        tmp_mean_isigi = 0
        tmp_n = 0
        tmp_violations = 0

      to_be_checked = []
      for s in sg.group():
        #check if this is an operator that causes absences
        tmp =  conditions_for_operator( s )
        if tmp.absence_type() != "None":
          if tmp.absence_type() in self.absences_table.op_name:
            ii = self.absences_table.op_name.index( tmp.absence_type() )
            if tmp.absence_type() not in to_be_checked:
              if equivs.has_key( tmp.absence_type() ):
                if equivs[ tmp.absence_type() ] not in to_be_checked:
                  to_be_checked.append( tmp.absence_type() )
                  tmp_score = self.absences_table.score[ ii ]
              else:
                  to_be_checked.append( tmp.absence_type() )
                  tmp_score = self.absences_table.score[ ii ]

      self.abs_types.append( to_be_checked )
      tuple_score =  self.absences_table.propose( to_be_checked )
      self.tuple_score.append( tuple_score )

      self.sg_choices.append(  sg )
      self.mean_i.append( tmp_mean_i )
      self.mean_isigi.append( tmp_mean_isigi )
      self.n.append( tmp_n )
      self.violations.append( tmp_violations )
    tmp_rows = self.suggest_likely_candidates()
    self.sorted_table = table_utils.simple_table(
      column_headers=['space group', '#  absent', '<Z>_absent',
                      '<Z/sigZ>_absent', '+++', '---', 'score'],
      table_rows=tmp_rows)
Example #40
0
def run_sys_abs_checks(experiments, reflections, d_min=None, significance_level=0.95):
    """Check for systematic absences in the data for the laue group.

    Select the good data, merge, test screw axes and score possible space
    groups. The crystals are updated with the most likely space group.
    """

    if (
        "inverse_scale_factor" in reflections[0]
        and "intensity.scale.value" in reflections[0]
    ):
        logger.info("Attempting to perform absence checks on scaled data")
        reflections = filter_reflection_table(
            reflections[0], intensity_choice=["scale"], d_min=d_min
        )
        reflections["intensity"] = reflections["intensity.scale.value"]
        reflections["variance"] = reflections["intensity.scale.variance"]
    else:
        logger.info(
            "Attempting to perform absence checks on unscaled profile-integrated data"
        )
        reflections = filter_reflection_table(
            reflections[0], intensity_choice=["profile"], d_min=d_min
        )
        reflections["intensity"] = reflections["intensity.prf.value"]
        reflections["variance"] = reflections["intensity.prf.variance"]

    # now merge
    space_group = experiments[0].crystal.get_space_group()
    reflections["asu_miller_index"] = map_indices_to_asu(
        reflections["miller_index"], space_group
    )
    reflections["inverse_scale_factor"] = flex.double(reflections.size(), 1.0)
    merged = (
        _reflection_table_to_iobs(
            reflections, experiments[0].crystal.get_unit_cell(), space_group
        )
        .merge_equivalents(use_internal_variance=False)
        .array()
    )
    merged_reflections = flex.reflection_table()
    merged_reflections["intensity"] = merged.data()
    merged_reflections["variance"] = merged.sigmas() ** 2
    merged_reflections["miller_index"] = merged.indices()

    # Get the laue class from the space group.
    laue_group = str(space_group.build_derived_patterson_group().info())
    logger.info("Laue group: %s", laue_group)
    if laue_group not in laue_groups:
        logger.info("No absences to check for this laue group")
        return

    # Score the screw axes.
    screw_axes, screw_axis_scores = score_screw_axes(
        laue_groups[laue_group], merged_reflections, significance_level
    )

    logger.info(
        simple_table(
            [
                [
                    a.name,
                    "%.3f" % score,
                    str(a.n_refl_used[0]),
                    str(a.n_refl_used[1]),
                    "%.3f" % a.mean_I,
                    "%.3f" % a.mean_I_abs,
                    "%.3f" % a.mean_I_sigma,
                    "%.3f" % a.mean_I_sigma_abs,
                ]
                for a, score in zip(screw_axes, screw_axis_scores)
            ],
            column_headers=[
                "Screw axis",
                "Score",
                "No. present",
                "No. absent",
                "<I> present",
                "<I> absent",
                "<I/sig> present",
                "<I/sig> absent",
            ],
        ).format()
    )

    # Score the space groups from the screw axis scores.
    space_groups, scores = score_space_groups(
        screw_axis_scores, laue_groups[laue_group]
    )

    logger.info(
        simple_table(
            [[sg, "%.4f" % score] for sg, score in zip(space_groups, scores)],
            column_headers=["Space group", "score"],
        ).format()
    )

    # Find the best space group and update the experiments.
    best_sg = space_groups[scores.index(max(scores))]
    logger.info("Recommended space group: %s", best_sg)
    if "enantiomorphic pairs" in laue_groups[laue_group]:
        if best_sg in laue_groups[laue_group]["enantiomorphic pairs"]:
            logger.info(
                "Space group with equivalent score (enantiomorphic pair): %s",
                laue_groups[laue_group]["enantiomorphic pairs"][best_sg],
            )

    new_sg = sgtbx.space_group_info(symbol=best_sg).group()
    for experiment in experiments:
        experiment.crystal.set_space_group(new_sg)
Example #41
0
    def run(self):
        """Execute the script."""

        # Parse the command line
        self.params, _ = self.parser.parse_args(show_diff_phil=True)

        if not self.params.input.experiments:
            self.parser.print_help()
            sys.exit()

        # Try to load the models
        experiments = flatten_experiments(self.params.input.experiments)
        nexp = len(experiments)
        if nexp == 0:
            self.parser.print_help()
            sys.exit("No Experiments found in the input")

        # Set up a plot if requested
        if self.params.plot_filename:
            plt.figure()

        header = [
            "Image",
            "Beam direction (xyz)",
            "Zone axis [uvw]",
            "Angle from\nprevious (deg)",
        ]
        for iexp, exp in enumerate(experiments):
            print("For Experiment id = {}".format(iexp))
            print(exp.beam)
            print(exp.crystal)
            print(exp.scan)

            if self.params.scale == "ewald_sphere_radius":
                scale = 1.0 / exp.beam.get_wavelength()
            elif self.params.scale == "max_cell":
                uc = exp.crystal.get_unit_cell()
                scale = max(uc.parameters()[0:3])
            else:
                scale = 1.0
            print("Beam direction scaled by {0} = {1:.3f} to "
                  "calculate zone axis\n".format(self.params.scale, scale))

            dat = extract_experiment_data(exp, scale)
            images = dat["images"]
            directions = dat["directions"]
            zone_axes = dat["zone_axes"]

            # calculate the orientation offset between each image
            offset = [
                e1.angle(e2, deg=True)
                for e1, e2 in zip(zone_axes[:-1], zone_axes[1:])
            ]
            str_off = ["---"] + ["{:.8f}".format(e) for e in offset]

            rows = []
            for i, d, z, a in zip(images, directions, zone_axes, str_off):
                row = [
                    str(i),
                    "{:.8f} {:.8f} {:.8f}".format(*d.elems),
                    "{:.8f} {:.8f} {:.8f}".format(*z.elems),
                    a,
                ]
                rows.append(row)

            # Print the table
            st = simple_table(rows, header)
            print(st.format())

            # Add to the plot, if requested
            if self.params.plot_filename:
                plt.scatter(images[1:], offset, s=1)

        # Finish and save plot, if requested
        if self.params.plot_filename:
            plt.xlabel("Image number")
            plt.ylabel(r"Angle from previous image $\left(^\circ\right)$")
            plt.title(r"Angle between neighbouring images")
            print("Saving plot to {}".format(self.params.plot_filename))
            plt.savefig(self.params.plot_filename)

        print()
Example #42
0
  def __call__(self, reflections):
    """Identify outliers in the input and set the centroid_outlier flag.
    Return True if any outliers were detected, otherwise False"""

    if self._verbosity > 0:
      logger.info("Detecting centroid outliers using the {0} algorithm".format(
        type(self).__name__))

    # check the columns are present
    for col in self._cols: assert col in reflections

    sel = reflections.get_flags(reflections.flags.used_in_refinement)
    all_data = reflections.select(sel)
    all_data_indices = sel.iselection()
    nexp = flex.max(all_data['id']) + 1

    jobs = []
    if self._separate_experiments:
      # split the data set by experiment id
      for iexp in xrange(nexp):
        sel = all_data['id'] == iexp
        job = {'id':iexp, 'panel':'all', 'data':all_data.select(sel),
               'indices':all_data_indices.select(sel)}
        jobs.append(job)
    else:
      # keep the whole dataset across all experiment ids
      job = {'id':'all', 'panel':'all', 'data':all_data,
             'indices':all_data_indices}
      jobs.append(job)

    jobs2 = []
    if self._separate_panels:
      # split further by panel id
      for job in jobs:
        data = job['data']
        iexp = job['id']
        indices = job['indices']
        for ipanel in xrange(flex.max(data['panel']) + 1):
          sel = data['panel'] == ipanel
          job = {'id':iexp, 'panel':ipanel, 'data':data.select(sel),
                  'indices':indices.select(sel)}
          jobs2.append(job)
    else:
      # keep the splits as they are
      jobs2 = jobs

    jobs3 = []
    if self.get_block_width() is not None:
      # split into equal-sized phi ranges
      for job in jobs2:
        data = job['data']
        iexp = job['id']
        ipanel = job['panel']
        indices = job['indices']
        phi = data['xyzobs.mm.value'].parts()[2]
        if len(phi) == 0: # detect no data in the job
          jobs3.append(job)
          continue
        phi_low = flex.min(phi)
        phi_range = flex.max(phi) - phi_low
        if phi_range == 0.0: # detect stills and do not split
          jobs3.append(job)
          continue
        bw = self.get_block_width(iexp)
        if bw is None: # detect no split for this experiment
          jobs3.append(job)
          continue
        nblocks = int(round(RAD2DEG * phi_range / bw))
        nblocks = max(1, nblocks)
        real_width = phi_range / nblocks
        block_end = 0.0
        for iblock in xrange(nblocks - 1): # all except the last block
          block_start = iblock * real_width
          block_end = (iblock + 1) * real_width
          sel = (phi >=  (phi_low + block_start)) & \
                (phi < (phi_low + block_end))
          job = {'id':iexp, 'panel':ipanel, 'data':data.select(sel),
                 'indices':indices.select(sel),
                 'phi_start':RAD2DEG*(phi_low + block_start),
                 'phi_end':RAD2DEG*(phi_low + block_end)}
          jobs3.append(job)
        # now last block
        sel = phi >= (phi_low + block_end)
        job = {'id':iexp, 'panel':ipanel, 'data':data.select(sel),
               'indices':indices.select(sel),
               'phi_start':RAD2DEG*(phi_low + block_end),
               'phi_end':RAD2DEG*(phi_low + phi_range)}
        jobs3.append(job)
    else:
      # keep the splits as they are
      jobs3 = jobs2

    # Work out the format of the jobs table
    if self._verbosity > 0:
      header = ['Job']
      if self._separate_experiments: header.append('Exp\nid')
      if self._separate_panels: header.append('Panel\nid')
      if self.get_block_width() is not None: header.append('Block range\n(deg)')
      header.extend(['Nref', 'Nout', '%out'])
      rows = []

    # now loop over the lowest level of splits
    for i, job in enumerate(jobs3):

      data = job['data']
      indices = job['indices']
      iexp = job['id']
      ipanel = job['panel']
      nref = len(indices)

      if nref >= self._min_num_obs:

        # get the subset of data as a list of columns
        cols = [data[col] for col in self._cols]

        # determine the position of outliers on this sub-dataset
        outliers = self._detect_outliers(cols)

        # get positions of outliers from the original matches
        ioutliers = indices.select(outliers)

      elif nref > 0:
        # too few reflections in the job
        msg = "For job {0}, fewer than {1} reflections are present.".format(
          i + 1, self._min_num_obs)
        msg += " All reflections flagged as possible outliers."
        if self._verbosity > 0: logger.debug(msg)
        ioutliers = indices

      else:
        # no reflections in the job
        ioutliers = indices

      # set the centroid_outlier flag in the original reflection table
      nout = len(ioutliers)
      if nout > 0:
        reflections.set_flags(ioutliers,
          reflections.flags.centroid_outlier)
        self.nreject += nout

      # Add job data to the table
      if self._verbosity > 0:
        row = [str(i + 1)]
        if self._separate_experiments: row.append(str(iexp))
        if self._separate_panels: row.append(str(ipanel))
        if self.get_block_width() is not None:
          try:
            row.append('{phi_start:.2f} - {phi_end:.2f}'.format(**job))
          except KeyError:
            row.append('{0:.2f} - {1:.2f}'.format(0.0,0.0))
        if nref == 0:
          p100 = 0
        else:
          p100 = nout / nref * 100.0
          if p100 > 30.0:
            msg = ("{0:3.1f}% of reflections were flagged as outliers from job"
                   " {1}").format(p100, i + 1)
        row.extend([str(nref), str(nout), '%3.1f' % p100])
        rows.append(row)

    if self.nreject == 0: return False
    if self._verbosity > 0:
      logger.info("{0} reflections have been flagged as outliers".format(self.nreject))
      logger.debug("Outlier rejections per job:")
      st = simple_table(rows, header)
      logger.debug(st.format())

    return True
Example #43
0
    def print_panel_rmsd_table(self):
        """print useful output about refinement steps in the form of a simple table"""

        from libtbx.table_utils import simple_table

        if len(self._experiments.scans()) > 1:
            logger.warning(
                "Multiple scans present. Only the first scan will be used "
                "to determine the image width for reporting RMSDs")
        scan = self._experiments.scans()[0]
        try:
            images_per_rad = 1.0 / abs(scan.get_oscillation(deg=False)[1])
        except AttributeError:
            images_per_rad = None

        for idetector, detector in enumerate(self._experiments.detectors()):
            if len(detector) == 1:
                continue
            logger.info("\nDetector {} RMSDs by panel:".format(idetector + 1))

            header = ["Panel\nid", "Nref"]
            for (name, units) in zip(self._target.rmsd_names,
                                     self._target.rmsd_units):
                if name == "RMSD_X" or name == "RMSD_Y" and units == "mm":
                    header.append(name + "\n(px)")
                elif (name == "RMSD_Phi" and units == "rad"
                      ):  # convert radians to images for reporting of scans
                    header.append("RMSD_Z" + "\n(images)")
                elif (name == "RMSD_DeltaPsi" and units == "rad"
                      ):  # convert radians to degrees for reporting of stills
                    header.append(name + "\n(deg)")
                else:  # skip RMSDs that cannot be expressed in image/scan space
                    pass

            rows = []
            for ipanel, panel in enumerate(detector):

                px_size = panel.get_pixel_size()
                px_per_mm = [1.0 / e for e in px_size]
                num = self._target.get_num_matches_for_panel(ipanel)
                if num <= 0:
                    continue
                raw_rmsds = self._target.rmsds_for_panel(ipanel)
                if raw_rmsds is None:
                    continue  # skip panels where rmsd cannot be calculated
                rmsds = []
                for (name, units, rmsd) in zip(self._target.rmsd_names,
                                               self._target.rmsd_units,
                                               raw_rmsds):
                    if name == "RMSD_X" and units == "mm":
                        rmsds.append(rmsd * px_per_mm[0])
                    elif name == "RMSD_Y" and units == "mm":
                        rmsds.append(rmsd * px_per_mm[1])
                    elif name == "RMSD_Phi" and units == "rad":
                        rmsds.append(rmsd * images_per_rad)
                    elif name == "RMSD_DeltaPsi" and units == "rad":
                        rmsds.append(rmsd * RAD2DEG)
                rows.append([str(ipanel), str(num)] +
                            ["%.5g" % r for r in rmsds])

            if len(rows) > 0:
                st = simple_table(rows, header)
                logger.info(st.format())

        return
Example #44
0
  def print_stats_on_matches(self):
    """Print some basic statistics on the matches"""

    l = self.get_matches()
    nref = len(l)

    from libtbx.table_utils import simple_table
    from scitbx.math import five_number_summary
    x_resid = l['x_resid']
    y_resid = l['y_resid']
    delpsi = l['delpsical.rad']
    w_x, w_y, _ = l['xyzobs.mm.weights'].parts()
    w_delpsi = l['delpsical.weights']

    msg = "\nSummary statistics for {0} observations".format(nref) +\
          " matched to predictions:"
    header = ["", "Min", "Q1", "Med", "Q3", "Max"]
    rows = []
    try:
      row_data = five_number_summary(x_resid)
      rows.append(["Xc - Xo (mm)"] + ["%.4g" % e for e in row_data])
      row_data = five_number_summary(y_resid)
      rows.append(["Yc - Yo (mm)"] + ["%.4g" % e for e in row_data])
      row_data = five_number_summary(delpsi)
      rows.append(["DeltaPsi (deg)"] + ["%.4g" % (e * RAD2DEG) for e in row_data])
      row_data = five_number_summary(w_x)
      rows.append(["X weights"] + ["%.4g" % e for e in row_data])
      row_data = five_number_summary(w_y)
      rows.append(["Y weights"] + ["%.4g" % e for e in row_data])
      row_data = five_number_summary(w_delpsi)
      rows.append(["DeltaPsi weights"] + ["%.4g" % (e * DEG2RAD**2) for e in row_data])
    except IndexError:
      # zero length reflection list
      logger.warning("Unable to calculate summary statistics for zero observations")
      return
    logger.info(msg)
    st = simple_table(rows, header)
    logger.info(st.format())
    logger.info("")

    # sorting is expensive and the following table is only of interest in
    # special cases, so return now if verbosity is not high
    if self._verbosity < 3: return

    if nref < 20:
      logger.debug("Fewer than 20 reflections matched!")
      return

    sl = self._sort_obs_by_residual(l)
    logger.debug("Reflections with the worst 20 positional residuals:")
    header = ['Miller index', 'x_resid', 'y_resid', 'pnl',
              'x_obs', 'y_obs', 'x_obs\nweight', 'y_obs\nweight']
    rows = []
    for i in xrange(20):
      e = sl[i]
      x_obs, y_obs, _ = e['xyzobs.mm.value']
      rows.append(['% 3d, % 3d, % 3d'%e['miller_index'],
                   '%5.3f'%e['x_resid'],
                   '%5.3f'%e['y_resid'],
                   '%d'%e['panel'],
                   '%5.3f'%x_obs,
                   '%5.3f'%y_obs,
                   '%5.3f'%e['xyzobs.mm.weights'][0],
                   '%5.3f'%e['xyzobs.mm.weights'][1]])
    logger.debug(simple_table(rows, header).format())
    logger.debug("")

    return