def plot_histograms(self, reflections, panel = None, ax = None, bounds = None):
    data = reflections['difference_vector_norms']
    colors = ['b-', 'g-', 'g--', 'r-', 'b-', 'b--']
    n_slots = 20
    if self.params.residuals.histogram_max is None:
      h = flex.histogram(data, n_slots=n_slots)
    else:
      h = flex.histogram(data.select(data <= self.params.residuals.histogram_max), n_slots=n_slots)

    n = len(reflections)
    rmsd_obs = math.sqrt((reflections['xyzcal.mm']-reflections['xyzobs.mm.value']).sum_sq()/n)
    sigma = mode = h.slot_centers()[list(h.slots()).index(flex.max(h.slots()))]
    mean_obs = flex.mean(data)
    median = flex.median(data)
    mean_rayleigh = math.sqrt(math.pi/2)*sigma
    rmsd_rayleigh = math.sqrt(2)*sigma

    data = flex.vec2_double([(i,j) for i, j in zip(h.slot_centers(), h.slots())])
    n = len(data)
    for i in [mean_obs, mean_rayleigh, mode, rmsd_obs, rmsd_rayleigh]:
      data.extend(flex.vec2_double([(i, 0), (i, flex.max(h.slots()))]))
    data = self.get_bounded_data(data, bounds)
    tmp = [data[:n]]
    for i in xrange(len(colors)):
      tmp.append(data[n+(i*2):n+((i+1)*2)])
    data = tmp

    for d, c in zip(data, colors):
      ax.plot(d.parts()[0], d.parts()[1], c)

    if ax.get_legend() is None:
      ax.legend([r"$\Delta$XY", "MeanObs", "MeanRayl", "Mode", "RMSDObs", "RMSDRayl"])
  def histogram(self, reflections, title):
    data = reflections['difference_vector_norms']
    n_slots = 100
    if self.params.residuals.histogram_max is None:
      h = flex.histogram(data, n_slots=n_slots)
    else:
      h = flex.histogram(data.select(data <= self.params.residuals.histogram_max), n_slots=n_slots)

    n = len(reflections)
    rmsd = math.sqrt((reflections['xyzcal.mm']-reflections['xyzobs.mm.value']).sum_sq()/n)
    sigma = mode = h.slot_centers()[list(h.slots()).index(flex.max(h.slots()))]
    mean = flex.mean(data)
    median = flex.median(data)
    print "RMSD (microns)", rmsd * 1000
    print "Histogram mode (microns):", mode * 1000
    print "Overall mean (microns):", mean * 1000
    print "Overall median (microns):", median * 1000
    mean2 = math.sqrt(math.pi/2)*sigma
    rmsd2 = math.sqrt(2)*sigma
    print "Rayleigh Mean (microns)", mean2 * 1000
    print "Rayleigh RMSD (microns)", rmsd2 * 1000

    r = reflections['radial_displacements']
    t = reflections['transverse_displacements']
    print "Overall radial RMSD (microns)", math.sqrt(flex.sum_sq(r)/len(r)) * 1000
    print "Overall transverse RMSD (microns)", math.sqrt(flex.sum_sq(t)/len(t)) * 1000

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-')

    vmax = self.params.residuals.plot_max
    if self.params.residuals.histogram_xmax is not None:
      ax.set_xlim((0,self.params.residuals.histogram_xmax))
    if self.params.residuals.histogram_ymax is not None:
      ax.set_ylim((0,self.params.residuals.histogram_ymax))
    plt.title(title)


    ax.plot((mean, mean), (0, flex.max(h.slots())), 'g-')
    ax.plot((mean2, mean2), (0, flex.max(h.slots())), 'g--')
    ax.plot((mode, mode), (0, flex.max(h.slots())), 'r-')
    ax.plot((rmsd, rmsd), (0, flex.max(h.slots())), 'b-')
    ax.plot((rmsd2, rmsd2), (0, flex.max(h.slots())), 'b--')

    ax.legend([r"$\Delta$XY", "MeanObs", "MeanRayl", "Mode", "RMSDObs", "RMSDRayl"])
    ax.set_xlabel("(mm)")
    ax.set_ylabel("Count")
  def plot_unitcells(self, experiments):
    if len(experiments) == 1:
      return
    all_a = flex.double()
    all_b = flex.double()
    all_c = flex.double()
    for crystal in experiments.crystals():
      a, b, c = crystal.get_unit_cell().parameters()[0:3]
      all_a.append(a); all_b.append(b); all_c.append(c)

    fig, axes = plt.subplots(nrows=3, ncols=1)
    for ax, axis, data in zip(axes, ['A', 'B', 'C'], [all_a, all_b, all_c]):
      stats = flex.mean_and_variance(data)
      cutoff = 4*stats.unweighted_sample_standard_deviation()
      if cutoff < 0.5:
        cutoff = 0.5
      limits = stats.mean()-cutoff, stats.mean()+cutoff
      sel = (data >= limits[0]) & (data <= limits[1])
      subset = data.select(sel)
      h = flex.histogram(subset,n_slots=50)
      ax.plot(h.slot_centers().as_numpy_array(),h.slots().as_numpy_array(),'-')
      ax.set_title("%s axis histogram (showing %d of %d xtals). Mean: %7.2f Stddev: %7.2f"%(
        axis, len(subset), len(data), stats.mean(),
        stats.unweighted_sample_standard_deviation()))
      ax.set_ylabel("N lattices")
      ax.set_xlabel(r"$\AA$")
      ax.set_xlim(limits)
    plt.tight_layout()
  def plot_cdf_manually(self, reflections, panel = None, ax = None, bounds = None):
    colors = ['blue', 'green']
    r = (reflections['xyzcal.mm']-reflections['xyzobs.mm.value']).norms()
    h = flex.histogram(r)
    sigma = h.slot_centers()[list(h.slots()).index(flex.max(h.slots()))] # mode

    x_extent = max(r)
    y_extent = len(r)
    xobs = [i/x_extent for i in sorted(r)]
    yobs = [i/y_extent for i in xrange(y_extent)]
    obs = [(x, y) for x, y in zip(xobs, yobs)]

    ncalc = 100
    xcalc = [i/ncalc for i in xrange(ncalc)]
    ycalc = [1-math.exp((-i**2)/(2*(sigma**2))) for i in xcalc]
    calc = [(x, y) for x, y in zip(xcalc, ycalc)]

    data = [flex.vec2_double(obs),
            flex.vec2_double(calc)]
    if bounds is None:
      ax.set_xlim((-1,1))
      ax.set_ylim((-1,1))
      ax.set_title("%s Outlier SP Manually"%self.params.tag)
    if bounds is not None:
      data = [self.get_bounded_data(d, bounds) for d in data]

    if ax is None:
      fig = plt.figure()
      ax = fig.add_subplot(111)

    for subset,c in zip(data, colors):
        ax.plot(subset.parts()[0], subset.parts()[1], '-', c=c)
  def plot_difference_vector_norms_histograms(self, reflections, panel = None, ax = None, bounds = None):
    r = reflections['difference_vector_norms']*1000
    h = flex.histogram(r, n_slots=50, data_min=0, data_max=100)

    x_extent = max(r)
    y_extent = len(r)
    xobs = [i/x_extent for i in sorted(r)]
    yobs = [i/y_extent for i in xrange(y_extent)]
    obs = [(x, y) for x, y in zip(xobs, yobs)]

    if bounds is None:
      #ax.set_xlim((-1,1))
      #ax.set_ylim((-1,1))
      x = h.slot_centers().as_numpy_array()
      y = h.slots().as_numpy_array()
      ax.set_title("%s Residual norms histogram"%self.params.tag)
    if bounds is not None:
      d = flex.vec2_double(h.slot_centers(), h.slots().as_double())
      data = self.get_bounded_data(d, bounds)
      x, y = data.parts()

    if ax is None:
      fig = plt.figure()
      ax = fig.add_subplot(111)
    ax.plot(x, y, '-', c='blue')
Beispiel #6
0
def blank_counts_analysis(reflections, scan, phi_step, fractional_loss):
    if not len(reflections):
        raise ValueError("Input contains no reflections")

    xyz_px = reflections["xyzobs.px.value"]
    x_px, y_px, z_px = xyz_px.parts()
    phi = scan.get_angle_from_array_index(z_px)

    osc = scan.get_oscillation()[1]
    n_images_per_step = iceil(phi_step / osc)
    phi_step = n_images_per_step * osc

    array_range = scan.get_array_range()
    phi_min = scan.get_angle_from_array_index(array_range[0])
    phi_max = scan.get_angle_from_array_index(array_range[1])
    assert phi_min <= flex.min(phi)
    assert phi_max >= flex.max(phi)
    n_steps = max(int(round((phi_max - phi_min) / phi_step)), 1)
    hist = flex.histogram(
        z_px, data_min=array_range[0], data_max=array_range[1], n_slots=n_steps
    )
    logger.debug("Histogram:")
    logger.debug(hist.as_str())

    counts = hist.slots()
    fractional_counts = counts.as_double() / flex.max(counts)

    potential_blank_sel = fractional_counts <= fractional_loss

    xmin, xmax = zip(
        *[
            (slot_info.low_cutoff, slot_info.high_cutoff)
            for slot_info in hist.slot_infos()
        ]
    )

    d = {
        "data": [
            {
                "x": list(hist.slot_centers()),
                "y": list(hist.slots()),
                "xlow": xmin,
                "xhigh": xmax,
                "blank": list(potential_blank_sel),
                "type": "bar",
                "name": "blank_counts_analysis",
            }
        ],
        "layout": {
            "xaxis": {"title": "z observed (images)"},
            "yaxis": {"title": "Number of reflections"},
            "bargap": 0,
        },
    }

    blank_regions = blank_regions_from_sel(d["data"][0])
    d["blank_regions"] = blank_regions

    return d
Beispiel #7
0
def plot_outliers(data):
    """plots positions of outliers"""

    if not data["z"]:
        return {"outlier_xy_positions": {}, "outliers_vs_z": {}}

    hist = flex.histogram(
        flex.double(data["z"]), n_slots=min(100, int(len(data["z"]) * 10))
    )

    d = {
        "outlier_xy_positions": {
            "data": [
                {
                    "x": data["x"],
                    "y": data["y"],
                    "type": "scatter",
                    "mode": "markers",
                    "xaxis": "x",
                    "yaxis": "y",
                }
            ],
            "layout": {
                "title": "Outlier x-y positions",
                "xaxis": {
                    "anchor": "y",
                    "title": "x (px)",
                    "range": [0, data["image_size"][0]],
                },
                "yaxis": {
                    "anchor": "x",
                    "title": "y (px)",
                    "range": [0, data["image_size"][1]],
                },
            },
        },
        "outliers_vs_z": {
            "data": [
                {
                    "x": list(hist.slot_centers()),
                    "y": list(hist.slots()),
                    "type": "bar",
                    "name": "outliers vs rotation",
                }
            ],
            "layout": {
                "title": "Outlier distribution across frames",
                "xaxis": {"title": "frame"},
                "yaxis": {"title": "count"},
                "bargap": 0,
            },
        },
    }

    return d
Beispiel #8
0
def histogram(params, images):
    from astrotbx.input_output.loader import load_raw_image
    from dials.array_family import flex

    hr = None
    hg = None
    hb = None

    for image in images:
        r, g, b = load_raw_image(image, params=params.raw)

        tr = flex.histogram(r.as_1d(),
                            data_min=0,
                            data_max=65535,
                            n_slots=4096)
        tg = flex.histogram(g.as_1d(),
                            data_min=0,
                            data_max=65535,
                            n_slots=4096)
        tb = flex.histogram(b.as_1d(),
                            data_min=0,
                            data_max=65535,
                            n_slots=4096)

        if hr is None:
            hr = tr
        else:
            hr.update(tr)

        if hg is None:
            hg = tg
        else:
            hg.update(tg)

        if hb is None:
            hb = tb
        else:
            hb.update(tb)

    with open(params.output, 'w') as f:
        for cn in zip(hr.slot_centers(), hr.slots(), hg.slots(), hb.slots()):
            f.write('%.2f %d %d %d\n' % cn)
Beispiel #9
0
    def run(self, flags, sequence=None, observations=None, **kwargs):
        obs_x, obs_y = observations.centroids().px_position_xy().parts()

        import numpy as np

        H, xedges, yedges = np.histogram2d(
            obs_x.as_numpy_array(), obs_y.as_numpy_array(), bins=self.nbins
        )

        H_flex = flex.double(H.flatten().astype(np.float64))
        n_slots = min(int(flex.max(H_flex)), 30)
        hist = flex.histogram(H_flex, n_slots=n_slots)

        slots = hist.slots()
        cumulative_hist = flex.long(len(slots))
        for i, slot in enumerate(slots):
            cumulative_hist[i] = slot
            if i > 0:
                cumulative_hist[i] += cumulative_hist[i - 1]

        cumulative_hist = cumulative_hist.as_double() / flex.max(
            cumulative_hist.as_double()
        )

        cutoff = None
        gradients = flex.double()
        for i in range(len(slots) - 1):
            x1 = cumulative_hist[i]
            x2 = cumulative_hist[i + 1]
            g = (x2 - x1) / hist.slot_width()
            gradients.append(g)
            if (
                cutoff is None
                and i > 0
                and g < self.gradient_cutoff
                and gradients[i - 1] < self.gradient_cutoff
            ):
                cutoff = hist.slot_centers()[i - 1] - 0.5 * hist.slot_width()

        sel = np.column_stack(np.where(H > cutoff))
        for (ix, iy) in sel:
            flags.set_selected(
                (
                    (obs_x > xedges[ix])
                    & (obs_x < xedges[ix + 1])
                    & (obs_y > yedges[iy])
                    & (obs_y < yedges[iy + 1])
                ),
                False,
            )

        return flags
Beispiel #10
0
  def histogram(self, data):
    from matplotlib import pyplot as plt
    nslots = 100
    histogram = flex.histogram(
                               data=data,
                               n_slots=nslots)
    out = StringIO()
    histogram.show(f=out, prefix="    ", format_cutoffs="%6.2f")
    self.logger.main_log(out.getvalue() + '\n' + "Total: %d"%data.size() + '\n')

    if False:
      fig = plt.figure()
      plt.bar(histogram.slot_centers(), histogram.slots(), align="center", width=histogram.slot_width())
      plt.show()
    def _show_each(edges):
      for edge, ref_edge, label in zip(edges, ref_edges, labels):
        h = flex.histogram(edge, n_slots=n_slots)
        smin, smax = flex.min(edge), flex.max(edge)
        stats = flex.mean_and_variance(edge)

        self.logger.main_log("  %s edge"%label)
        self.logger.main_log("     range:     %6.2f - %.2f"%(smin, smax))
        self.logger.main_log("     mean:      %6.2f +/- %6.2f on N = %d" %(stats.mean(), stats.unweighted_sample_standard_deviation(), edge.size()))
        self.logger.main_log("     reference: %6.2f"%ref_edge)

        out = StringIO()
        h.show(f=out, prefix="    ", format_cutoffs="%6.2f")
        self.logger.main_log(out.getvalue() + '\n')
Beispiel #12
0
def run(args):
    from dials.util.options import OptionParser
    import libtbx.load_env

    usage = "%s [options] *ARW" % (
        libtbx.env.dispatcher_name)

    parser = OptionParser(
        usage=usage,
        phil=phil_scope)

    params, options, args = parser.parse_args(show_diff_phil=True,
                                              return_unhandled=True)

    from astrotbx.input_output.loader import load_image_gs, load_raw_image_gs
    from astrotbx.algorithms.star_find import hot
    from dials.array_family import flex

    raws = ['arw']

    total = None

    n = 0
    for arg in args:
        n += 1
        exten = arg.split('.')[-1].lower()
        if exten in raws:
            image = load_raw_image_gs(arg, params.raw)
        else:
            image = load_image_gs(arg)
        signal = hot(image, params).as_1d()
        if total is None:
            total = signal.as_int()
        else:
            total += signal.as_int()
    h_total = flex.histogram(total.as_double(),
                             data_min=-0.5, data_max=n + 0.5, n_slots=n + 1)

    for c, v in zip(h_total.slot_centers(), h_total.slots()):
        print(c, v)

    total.reshape(flex.grid(*image.focus()))

    if params.output:
        import cPickle as pickle
        with open(params.output, 'w') as fout:
            pickle.dump(total, fout, protocol=pickle.HIGHEST_PROTOCOL)
Beispiel #13
0
def gain(image, params):
    from dials.algorithms.image import filter
    from astrotbx.input_output.loader import load_image_gs, load_raw_image_gs
    from dials.array_family import flex
    from matplotlib import pyplot

    raws = ['arw']
    exten = image.split('.')[-1].lower()
    if exten in raws:
        image = load_raw_image_gs(image, params.raw)
    else:
        image = load_image_gs(image)

    disp = filter.index_of_dispersion_filter(image,
                                             (3, 3)).index_of_dispersion()
    hist = flex.histogram(disp.as_1d(),
                          data_min=params.min,
                          data_max=params.max,
                          n_slots=params.slots)
    return hist
  def image_rmsd_histogram(self, reflections, tag):
    data = flex.double()
    for i in set(reflections['id']):
      refls = reflections.select(reflections['id']==i)
      if len(refls) == 0:
        continue
      rmsd = math.sqrt(flex.sum_sq(refls['difference_vector_norms'])/len(refls))
      data.append(rmsd)
    data *= 1000
    h = flex.histogram(data, n_slots=40)
    fig = plt.figure()
    ax = fig.add_subplot('111')
    ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-')
    plt.title("%sHistogram of image RMSDs"%tag)
    ax.set_xlabel("RMSD (microns)")
    ax.set_ylabel("Count")

    fig = plt.figure()
    ax = fig.add_subplot('111')
    plt.boxplot(data, vert=False)
    plt.title("%sBoxplot of image RMSDs"%tag)
    ax.set_xlabel("RMSD (microns)")
Beispiel #15
0
def cell_hist():
    from dials.array_family import flex
    import sys

    a = flex.double()
    b = flex.double()
    c = flex.double()
    al = flex.double()
    be = flex.double()
    ga = flex.double()

    for arg in sys.argv[1:]:
        expt = load_experiment(arg)
        for xtal in expt.crystals():
            cell = xtal.get_unit_cell().parameters()
            a.append(cell[0])
            b.append(cell[1])
            c.append(cell[2])
            al.append(cell[3])
            be.append(cell[4])
            ga.append(cell[5])

    a_h = flex.histogram(a, data_min=0, data_max=100, n_slots=1000)
    b_h = flex.histogram(b, data_min=0, data_max=100, n_slots=1000)
    c_h = flex.histogram(c, data_min=0, data_max=100, n_slots=1000)
    al_h = flex.histogram(al, data_min=0, data_max=100, n_slots=1000)
    be_h = flex.histogram(be, data_min=0, data_max=100, n_slots=1000)
    ga_h = flex.histogram(ga, data_min=0, data_max=100, n_slots=1000)

    for v in zip(
            a_h.slot_centers(),
            a_h.slots(),
            b_h.slots(),
            c_h.slots(),
            al_h.slots(),
            be_h.slots(),
            ga_h.slots(),
    ):
        print("%5.2f %5d %5d %5d %5d %5d %5d" % v)
Beispiel #16
0
def blank_integrated_analysis(reflections, scan, phi_step, fractional_loss):
    prf_sel = reflections.get_flags(reflections.flags.integrated_prf)
    if prf_sel.count(True) > 0:
        reflections = reflections.select(prf_sel)
        intensities = reflections["intensity.prf.value"]
        variances = reflections["intensity.prf.variance"]
    else:
        sum_sel = reflections.get_flags(reflections.flags.integrated_sum)
        reflections = reflections.select(sum_sel)
        intensities = reflections["intensity.sum.value"]
        variances = reflections["intensity.sum.variance"]

    i_sigi = intensities / flex.sqrt(variances)

    xyz_px = reflections["xyzobs.px.value"]
    x_px, y_px, z_px = xyz_px.parts()
    phi = scan.get_angle_from_array_index(z_px)

    osc = scan.get_oscillation()[1]
    n_images_per_step = iceil(phi_step / osc)
    phi_step = n_images_per_step * osc

    array_range = scan.get_array_range()
    phi_min = flex.min(phi)
    phi_max = flex.max(phi)
    n_steps = int(round((phi_max - phi_min) / phi_step))
    hist = flex.histogram(
        z_px, data_min=array_range[0], data_max=array_range[1], n_slots=n_steps
    )
    logger.debug("Histogram:")
    logger.debug(hist.as_str())

    mean_i_sigi = flex.double()
    for i, slot_info in enumerate(hist.slot_infos()):
        sel = (z_px >= slot_info.low_cutoff) & (z_px < slot_info.high_cutoff)
        if sel.count(True) == 0:
            mean_i_sigi.append(0)
        else:
            mean_i_sigi.append(flex.mean(i_sigi.select(sel)))

    potential_blank_sel = mean_i_sigi <= (fractional_loss * flex.max(mean_i_sigi))

    xmin, xmax = zip(
        *[
            (slot_info.low_cutoff, slot_info.high_cutoff)
            for slot_info in hist.slot_infos()
        ]
    )

    d = {
        "data": [
            {
                "x": list(hist.slot_centers()),
                "y": list(mean_i_sigi),
                "xlow": xmin,
                "xhigh": xmax,
                "blank": list(potential_blank_sel),
                "type": "bar",
                "name": "blank_counts_analysis",
            }
        ],
        "layout": {
            "xaxis": {"title": "z observed (images)"},
            "yaxis": {"title": "Number of reflections"},
            "bargap": 0,
        },
    }

    blank_regions = blank_regions_from_sel(d["data"][0])
    d["blank_regions"] = blank_regions

    return d
Beispiel #17
0
def get_histogram(d_star_sq, target_n_per_bin=20, max_slots=20, min_slots=5):
  n_slots = len(d_star_sq)//target_n_per_bin
  n_slots = min(n_slots, max_slots)
  n_slots = max(n_slots, min_slots)
  return flex.histogram(d_star_sq, n_slots=n_slots)
Beispiel #18
0
for z0 in zs:
    s0 = z == (z0 + 0.5)
    xy0 = xy.select(s0)
    n0 = xy0.size()
    if n0 < 5:
        continue
    from annlib_ext import AnnAdaptor as ann_adaptor

    ann = ann_adaptor(xy0.as_double().as_1d(), 2)
    for z1 in zs:
        if z1 >= z0:
            break
        s1 = z == (z1 + 0.5)
        xy1 = xy.select(s1)
        n1 = xy1.size()
        if n1 < 5:
            continue
        ann.query(xy1.as_double().as_1d())
        d1 = flex.sqrt(ann.distances)
        m01 = (d1 < 5.0).count(True)
        s = m01 / (0.5 * (n0 + n1))
        sij[z0, z1] = s
        sij[z1, z0] = s

pickle.dump(sij, open("sij.pickle", "w"))

hij = flex.histogram(sij.as_1d(), data_min=0, data_max=1, n_slots=100)
for _c, _s in zip(hij.slot_centers(), hij.slots()):
    print(_c, _s)
Beispiel #19
0
def run(args):
    import libtbx.load_env

    usage = "%s [options]" % libtbx.env.dispatcher_name

    parser = OptionParser(
        usage=usage, phil=phil_scope, check_format=False, epilog=help_message
    )

    params, options, args = parser.parse_args(
        show_diff_phil=True, return_unhandled=True
    )

    space_group = params.space_group
    if space_group is None:
        space_group = sgtbx.space_group()
    else:
        space_group = space_group.group()

    unit_cell = params.unit_cell
    if unit_cell is None:
        unit_cell = space_group.info().any_compatible_unit_cell(volume=100000)
        print(unit_cell)

    assert len(args) == 2
    from cctbx import crystal, miller

    cs = crystal.symmetry(space_group=space_group, unit_cell=unit_cell)
    intensities = []
    for filename in args:
        hkl, i, sigi = parse_best_hkl(filename)
        ms = miller.set(cs, hkl)
        ma = miller.array(ms, data=i, sigmas=sigi)
        ma.set_observation_type_xray_intensity()
        intensities.append(ma)
        # ma.show_summary()

    # Two subplots, the axes array is 1-d
    from matplotlib import pyplot

    ma1, ma2 = intensities
    hist1 = flex.histogram(ma1.data(), n_slots=100)
    hist2 = flex.histogram(ma2.data(), n_slots=100)
    f, axarr = pyplot.subplots(2, sharex=True, figsize=(16, 12))
    axarr[0].bar(
        hist1.slot_centers() - 0.5 * hist1.slot_width(),
        hist1.slots(),
        align="center",
        width=hist1.slot_width(),
        color="black",
        edgecolor=None,
    )
    axarr[1].bar(
        hist2.slot_centers() - 0.5 * hist2.slot_width(),
        hist2.slots(),
        align="center",
        width=hist2.slot_width(),
        color="black",
        edgecolor=None,
    )
    pyplot.savefig("hist_intensities.png")
    pyplot.clf()

    hist1 = flex.histogram(ma1.data() / ma1.sigmas(), n_slots=100)
    hist2 = flex.histogram(ma2.data() / ma2.sigmas(), n_slots=100)
    f, axarr = pyplot.subplots(2, sharex=True, figsize=(16, 12))
    axarr[0].bar(
        hist1.slot_centers() - 0.5 * hist1.slot_width(),
        hist1.slots(),
        align="center",
        width=hist1.slot_width(),
        color="black",
        edgecolor=None,
    )
    axarr[1].bar(
        hist2.slot_centers() - 0.5 * hist2.slot_width(),
        hist2.slots(),
        align="center",
        width=hist2.slot_width(),
        color="black",
        edgecolor=None,
    )
    pyplot.savefig("hist_isigi.png")
    pyplot.clf()

    print(ma1.d_max_min())
    print(ma2.d_max_min())
    ma1.setup_binner(n_bins=20)
    ma2.setup_binner(n_bins=20)

    imean1 = ma1.mean(use_binning=True)
    imean2 = ma2.mean(use_binning=True)
    f, axarr = pyplot.subplots(2, sharex=True, figsize=(16, 12))
    axarr[0].plot(imean1.binner.bin_centers(2), imean1.data[1:-1])
    axarr[1].plot(imean2.binner.bin_centers(2), imean2.data[1:-1])
    ax = pyplot.gca()
    xticks = ax.get_xticks()
    xticks_d = ["%.2f" % uctbx.d_star_sq_as_d(ds2) if ds2 > 0 else 0 for ds2 in xticks]
    ax.set_xticklabels(xticks_d)
    pyplot.xlabel("d spacing (A)")
    pyplot.savefig("imean_vs_resolution.png")
    pyplot.clf()

    isigi1 = ma1.i_over_sig_i(use_binning=True)
    isigi2 = ma2.i_over_sig_i(use_binning=True)
    f, axarr = pyplot.subplots(2, sharex=True, figsize=(16, 12))
    axarr[0].plot(isigi1.binner.bin_centers(2), isigi1.data[1:-1])
    axarr[1].plot(isigi2.binner.bin_centers(2), isigi2.data[1:-1])
    ax = pyplot.gca()
    xticks = ax.get_xticks()
    xticks_d = ["%.2f" % uctbx.d_star_sq_as_d(ds2) if ds2 > 0 else 0 for ds2 in xticks]
    ax.set_xticklabels(xticks_d)
    pyplot.xlabel("d spacing (A)")
    pyplot.savefig("isigi_vs_resolution.png")
    pyplot.clf()

    best_cb_op = None
    best_count = 0
    for i_op, op in enumerate(
        space_group.build_derived_reflection_intensity_group(False).all_ops()
    ):
        if not op.t().is_zero():
            continue
        cb_op = sgtbx.change_of_basis_op(op)  # .inverse())

        ma1, ma2 = intensities
        ma1, ma2 = ma1.common_sets(ma2.change_basis(cb_op))
        # print cb_op
        # print ma1.size(), ma2.size()
        if ma1.size() > best_count:
            best_cb_op = cb_op
            best_count = ma1.size()

    print("Best cb_op: %s (%i matches)" % (best_cb_op, best_count))
    ma1, ma2 = intensities
    ma1, ma2 = ma1.common_sets(ma2.change_basis(best_cb_op))

    from matplotlib import pyplot

    pyplot.scatter(ma1.data(), ma2.data(), marker="+", alpha=0.5)
    m = max(pyplot.xlim()[1], pyplot.ylim()[1])
    pyplot.plot((0, m), (0, m), c="black")
    pyplot.xlabel(args[0])
    pyplot.ylabel(args[1])
    pyplot.savefig("scatter_intensities.png")
    pyplot.clf()

    pyplot.scatter(ma1.sigmas(), ma2.sigmas(), marker="+", alpha=0.5)
    m = max(pyplot.xlim()[1], pyplot.ylim()[1])
    pyplot.plot((0, m), (0, m), c="black")
    pyplot.savefig("scatter_sigmas.png")
    pyplot.clf()

    pyplot.scatter(
        flex.pow2(ma1.sigmas()), flex.pow2(ma2.sigmas()), marker="+", alpha=0.5
    )
    m = max(pyplot.xlim()[1], pyplot.ylim()[1])
    pyplot.plot((0, m), (0, m), c="black")
    pyplot.savefig("scatter_variances.png")
    pyplot.clf()

    isigi1 = ma1.data() / ma1.sigmas()
    isigi2 = ma2.data() / ma2.sigmas()
    pyplot.scatter(isigi1, isigi2, marker="+", alpha=0.5)
    m = max(pyplot.xlim()[1], pyplot.ylim()[1])
    pyplot.plot((0, m), (0, m), c="black")
    pyplot.savefig("scatter_i_sig_i.png")
    pyplot.clf()

    return
def run(args):

  from libtbx.phil import command_line

  from dials.util.command_line import Importer
  from dials.array_family import flex
  print args
  importer = Importer(args, check_format=False)
  assert len(importer.datablocks) == 1
  sweeps = importer.datablocks[0].extract_imagesets()
  assert len(sweeps) == 1
  sweep = sweeps[0]

  cmd_line = command_line.argument_interpreter(master_params=master_phil_scope)
  working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments)
  working_phil.show()

  params = working_phil.extract()
  assert params.unit_cell is not None
  assert params.space_group is not None
  unit_cell = params.unit_cell
  space_group = params.space_group.group()

  import random
  from dxtbx.model.crystal import crystal_model
  from cctbx import crystal, miller
  from scitbx import matrix

  flex.set_random_seed(params.random_seed)
  random.seed(params.random_seed)

  crystal_symmetry = crystal.symmetry(unit_cell=unit_cell,
                                      space_group=space_group)

  # the reciprocal matrix
  B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose()

  n_predicted = flex.double()

  def predict_once(args):
    from dxtbx.model.experiment.experiment_list import Experiment
    U = args[0]
    A = U * B
    direct_matrix = A.inverse()
    cryst_model = crystal_model(direct_matrix[0:3],
                                direct_matrix[3:6],
                                direct_matrix[6:9],
                                space_group=space_group)
    experiment = Experiment(imageset=sweep,
                            beam=sweep.get_beam(),
                            detector=sweep.get_detector(),
                            goniometer=sweep.get_goniometer(),
                            scan=sweep.get_scan(),
                            crystal=cryst_model)
    predicted_reflections = flex.reflection_table.from_predictions(
      experiment)
    miller_indices = predicted_reflections['miller_index']
    miller_set = miller.set(
      crystal_symmetry, miller_indices, anomalous_flag=True)
    if params.d_min is not None:
      resolution_sel = miller_set.d_spacings().data() > params.d_min
      predicted_reflections = predicted_reflections.select(resolution_sel)
    return len(predicted_reflections)

  from libtbx import easy_mp
  args = [(random_rotation(),) for i in range(params.n_samples)]
  results = easy_mp.parallel_map(
    func=predict_once,
    iterable=args,
    processes=params.nproc,
    preserve_order=True,
    preserve_exception_message=True)
  n_predicted = flex.double(results)

  print "Basic statistics:"
  from scitbx.math import basic_statistics
  stats = basic_statistics(n_predicted)
  stats.show()

  print "Histogram:"
  hist = flex.histogram(n_predicted, n_slots=20)
  hist.show()

  print "Raw spot counts:"
  print list(n_predicted)

  if params.plot:
    from matplotlib import pyplot
    from matplotlib.backends.backend_pdf import PdfPages

    pyplot.rc('font', family='serif')
    pyplot.rc('font', serif='Times New Roman')

    red, blue = '#B2182B', '#2166AC'
    fig = pyplot.figure()
    ax = fig.add_subplot(1,1,1)
    ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(),
           color=blue, edgecolor=blue)
    ax.set_xlabel('Spot count')
    ax.set_ylabel('Frequency')
    pdf = PdfPages("predicted_count_histogram.pdf")
    pdf.savefig(fig)
    pdf.close()
def run(args):

    from libtbx.phil import command_line

    from dials.util.command_line import Importer
    from dials.array_family import flex

    print(args)
    importer = Importer(args, check_format=False)
    assert len(importer.datablocks) == 1
    sweeps = importer.datablocks[0].extract_imagesets()
    assert len(sweeps) == 1
    sweep = sweeps[0]

    cmd_line = command_line.argument_interpreter(master_params=master_phil_scope)
    working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments)
    working_phil.show()

    params = working_phil.extract()
    assert params.unit_cell is not None
    assert params.space_group is not None
    unit_cell = params.unit_cell
    space_group = params.space_group.group()

    import random
    from dxtbx.model.crystal import crystal_model
    from cctbx import crystal, miller
    from scitbx import matrix

    flex.set_random_seed(params.random_seed)
    random.seed(params.random_seed)

    crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group)

    # the reciprocal matrix
    B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose()

    n_predicted = flex.double()

    def predict_once(args):
        from dxtbx.model.experiment.experiment_list import Experiment

        U = args[0]
        A = U * B
        direct_matrix = A.inverse()
        cryst_model = crystal_model(
            direct_matrix[0:3],
            direct_matrix[3:6],
            direct_matrix[6:9],
            space_group=space_group,
        )
        experiment = Experiment(
            imageset=sweep,
            beam=sweep.get_beam(),
            detector=sweep.get_detector(),
            goniometer=sweep.get_goniometer(),
            scan=sweep.get_scan(),
            crystal=cryst_model,
        )
        predicted_reflections = flex.reflection_table.from_predictions(experiment)
        miller_indices = predicted_reflections["miller_index"]
        miller_set = miller.set(crystal_symmetry, miller_indices, anomalous_flag=True)
        if params.d_min is not None:
            resolution_sel = miller_set.d_spacings().data() > params.d_min
            predicted_reflections = predicted_reflections.select(resolution_sel)
        return len(predicted_reflections)

    from libtbx import easy_mp

    args = [(random_rotation(),) for i in range(params.n_samples)]
    results = easy_mp.parallel_map(
        func=predict_once,
        iterable=args,
        processes=params.nproc,
        preserve_order=True,
        preserve_exception_message=True,
    )
    n_predicted = flex.double(results)

    print("Basic statistics:")
    from scitbx.math import basic_statistics

    stats = basic_statistics(n_predicted)
    stats.show()

    print("Histogram:")
    hist = flex.histogram(n_predicted, n_slots=20)
    hist.show()

    print("Raw spot counts:")
    print(list(n_predicted))

    if params.plot:
        from matplotlib import pyplot
        from matplotlib.backends.backend_pdf import PdfPages

        pyplot.rc("font", family="serif")
        pyplot.rc("font", serif="Times New Roman")

        red, blue = "#B2182B", "#2166AC"
        fig = pyplot.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.bar(
            hist.slot_centers(),
            hist.slots(),
            width=0.75 * hist.slot_width(),
            color=blue,
            edgecolor=blue,
        )
        ax.set_xlabel("Spot count")
        ax.set_ylabel("Frequency")
        pdf = PdfPages("predicted_count_histogram.pdf")
        pdf.savefig(fig)
        pdf.close()
Beispiel #22
0
    def plot_uc_histograms(uc_params,
                           outliers,
                           steps_per_angstrom=20,
                           plot_name='uc_histograms.png'):
        from matplotlib import pyplot as plt
        plt.style.use('ggplot')
        uc_labels = ['a', 'b', 'c']
        f, ax = plt.subplots(nrows=2, ncols=3, figsize=(12, 8))
        a, b, c = uc_params[:3]

        def uc_param_hist2d(p1, p2, ax):
            nbins = 100
            import numpy as np
            H, xedges, yedges = np.histogram2d(p1, p2, bins=nbins)
            H = np.rot90(H)
            H = np.flipud(H)
            Hmasked = np.ma.masked_where(H == 0, H)
            ax.pcolormesh(xedges, yedges, Hmasked)

        uc_param_hist2d(a, b, ax[0][0])
        uc_param_hist2d(b, c, ax[0][1])
        uc_param_hist2d(c, a, ax[0][2])

        for i in range(3):
            mmm = flex.min_max_mean_double(uc_params[i])
            import math
            steps_per_A = steps_per_angstrom
            Amin = math.floor(mmm.min * steps_per_A) / steps_per_A
            Amax = math.floor(mmm.max * steps_per_A) / steps_per_A
            n_slots = int((Amax - Amin) * steps_per_A)
            hist = flex.histogram(uc_params[i], Amin, Amax, n_slots=n_slots)
            hist_inliers = flex.histogram(uc_params[i].select(~outliers),
                                          Amin,
                                          Amax,
                                          n_slots=n_slots)
            ax[1][i].bar(hist.slot_centers(),
                         hist.slots(),
                         align='center',
                         width=hist.slot_width(),
                         zorder=10,
                         color='black',
                         edgecolor=None,
                         linewidth=0)
            ax[1][i].bar(hist_inliers.slot_centers(),
                         hist_inliers.slots(),
                         align='center',
                         width=hist_inliers.slot_width(),
                         zorder=10,
                         color='red',
                         edgecolor=None,
                         linewidth=0)

        ax[0][0].set_ylabel('b ($\AA$)')
        ax[0][1].set_ylabel('c ($\AA$)')
        ax[0][2].set_ylabel('a ($\AA$)')
        ax[1][0].set_xlabel('a ($\AA$)')
        ax[1][1].set_xlabel('b ($\AA$)')
        ax[1][2].set_xlabel('c ($\AA$)')

        f.savefig(plot_name)
        plt.tight_layout()
        plt.close(f)
Beispiel #23
0
def normal_probability_plot(data, label=None):
    """Plot the distribution of normal probabilities of errors."""

    n = data["delta_hl"].size
    y = np.sort(data["delta_hl"])
    delta = 0.5 / n
    v = np.linspace(start=delta, stop=1.0 - delta, endpoint=True, num=n)
    x = norm.ppf(v)

    H, xedges, yedges = np.histogram2d(x, y, bins=(200, 200))
    nonzeros = np.nonzero(H)
    z = np.empty(H.shape)
    z[:] = np.NAN
    z[nonzeros] = H[nonzeros]

    # also make a histogram
    histy = flex.histogram(flumpy.from_numpy(y), n_slots=100)
    # make a gaussian for reference also
    n = y.size
    width = histy.slot_centers()[1] - histy.slot_centers()[0]
    gaussian = [
        n * width * math.exp(-(sc ** 2) / 2.0) / ((2.0 * math.pi) ** 0.5)
        for sc in histy.slot_centers()
    ]
    key = (
        f"normal_distribution_plot_{label}"
        if label is not None
        else "normal_distribution_plot"
    )
    title = "Normal probability plot with error model applied"
    title = title + f" (error model {label})" if label is not None else title
    key_hist = f"nor_dev_hist_{label}" if label is not None else "nor_dev_hist"
    title_hist = "Normal deviations with error model applied"
    title_hist = (
        title_hist + f" (error model {label})" if label is not None else title_hist
    )
    return {
        key: {
            "data": [
                {
                    "x": xedges.tolist(),
                    "y": yedges.tolist(),
                    "z": z.transpose().tolist(),
                    "type": "heatmap",
                    "name": "normalised deviations",
                    "colorbar": {
                        "title": "Number of reflections",
                        "titleside": "right",
                    },
                    "colorscale": "Jet",
                },
                {
                    "x": [-5, 5],
                    "y": [-5, 5],
                    "type": "scatter",
                    "mode": "lines",
                    "name": "z = m",
                    "color": "rgb(0,0,0)",
                },
            ],
            "layout": {
                "title": title,
                "xaxis": {"anchor": "y", "title": "Order statistic medians, m"},
                "yaxis": {"anchor": "x", "title": "Ordered responses, z"},
            },
            "help": """\
This plot shows the normalised devations (of each reflection from the
group-weighted mean), sorted in order and plotted against the expected order
based on a normal distribution model. A true normal distribution of deviations
would give the straight line indicated. If the errors are well described by
this model, the ordered responses should closely fit the straight line to
high absolute values of x (>3), where there is typically a deviation away from
the line due to wide tails of the distribution.
""",
        },
        key_hist: {
            "data": [
                {
                    "x": list(histy.slot_centers()),
                    "y": list(histy.slots()),
                    "type": "bar",
                    "name": "dataset normalised deviations",
                },
                {
                    "x": list(histy.slot_centers()),
                    "y": gaussian,
                    "type": "scatter",
                    "name": "Ideal normal distribution",
                },
            ],
            "layout": {
                "title": title_hist,
                "xaxis": {"anchor": "y", "title": "Normalised deviation"},
                "yaxis": {"anchor": "x", "title": "Number of reflections"},
            },
            "help": """\
This plot shows the distribution of normalised devations (of each reflection
from the group-weighted mean), for the reflections used to minimise the error
model. A true normal distribution is indicated.
""",
        },
    }
Beispiel #24
0
def plot_absorption_plots(physical_model, reflection_table=None):
    """Make a number of plots to help with the interpretation of the
    absorption correction."""
    # First plot the absorption surface

    d = {
        "absorption_surface": {
            "data": [],
            "layout": {
                "title": "Absorption correction surface",
                "xaxis": {
                    "domain": [0, 1],
                    "anchor": "y",
                    "title": "azimuthal angle (degrees)",
                },
                "yaxis": {
                    "domain": [0, 1],
                    "anchor": "x",
                    "title": "polar angle (degrees)",
                },
            },
            "help": absorption_help_msg,
        }
    }

    params = physical_model.components["absorption"].parameters

    order = int(-1.0 + ((1.0 + len(params)) ** 0.5))
    lfg = scitbxmath.log_factorial_generator(2 * order + 1)
    STEPS = 50
    azimuth_ = np.linspace(0, 2 * np.pi, 2 * STEPS)
    polar_ = np.linspace(0, np.pi, STEPS)
    THETA, _ = np.meshgrid(azimuth_, polar_, indexing="ij")
    lmax = int(-1.0 + ((1.0 + len(params)) ** 0.5))
    Intensity = np.ones(THETA.shape)
    undiffracted_intensity = np.ones(THETA.shape)
    counter = 0
    sqrt2 = math.sqrt(2)
    nsssphe = scitbxmath.nss_spherical_harmonics(order, 50000, lfg)
    for l in range(1, lmax + 1):
        for m in range(-l, l + 1):
            for it, t in enumerate(polar_):
                for ip, p in enumerate(azimuth_):
                    Ylm = nsssphe.spherical_harmonic(l, abs(m), t, p)
                    if m < 0:
                        r = sqrt2 * ((-1) ** m) * Ylm.imag
                    elif m == 0:
                        assert Ylm.imag == 0.0
                        r = Ylm.real
                    else:
                        r = sqrt2 * ((-1) ** m) * Ylm.real
                    Intensity[ip, it] += params[counter] * r
                    # for the undiffracted intensity, we want to add the correction
                    # at each point to the parity conjugate. We can use the fact
                    # that the odd l terms are parity odd, and even are even, to
                    # just calculate the even terms as follows
                    if l % 2 == 0:
                        undiffracted_intensity[ip, it] += params[counter] * r
            counter += 1
    d["absorption_surface"]["data"].append(
        {
            "x": list(azimuth_ * 180.0 / np.pi),
            "y": list(polar_ * 180.0 / np.pi),
            "z": list(Intensity.T.tolist()),
            "type": "heatmap",
            "colorscale": "Viridis",
            "colorbar": {"title": "inverse <br>scale factor"},
            "name": "absorption surface",
            "xaxis": "x",
            "yaxis": "y",
        }
    )

    d["undiffracted_absorption_surface"] = {
        "data": [],
        "layout": {
            "title": "Undiffracted absorption correction",
            "xaxis": {
                "domain": [0, 1],
                "anchor": "y",
                "title": "azimuthal angle (degrees)",
            },
            "yaxis": {
                "domain": [0, 1],
                "anchor": "x",
                "title": "polar angle (degrees)",
            },
        },
        "help": """
This plot shows the calculated relative absorption for a paths travelling
straight through the crystal at a given direction in a crystal-fixed frame of
reference (in spherical coordinates). This gives an indication of the effective
shape of the crystal for absorbing x-rays. In this plot, the pole (polar angle 0)
corresponds to the laboratory x-axis.
""",
    }

    d["undiffracted_absorption_surface"]["data"].append(
        {
            "x": list(azimuth_ * 180.0 / np.pi),
            "y": list(polar_ * 180.0 / np.pi),
            "z": list(undiffracted_intensity.T.tolist()),
            "type": "heatmap",
            "colorscale": "Viridis",
            "colorbar": {"title": "inverse <br>scale factor"},
            "name": "Undiffracted absorption correction",
            "xaxis": "x",
            "yaxis": "y",
        }
    )

    if not reflection_table:
        return d

    # now plot the directions of the scattering vectors

    d["vector_directions"] = {
        "data": [],
        "layout": {
            "title": "Scattering vectors in crystal frame",
            "xaxis": {
                "domain": [0, 1],
                "anchor": "y",
                "title": "azimuthal angle (degrees)",
                "range": [0, 360],
            },
            "yaxis": {
                "domain": [0, 1],
                "anchor": "x",
                "title": "polar angle (degrees)",
                "range": [0, 180],
            },
            "coloraxis": {
                "showscale": False,
            },
        },
        "help": """
This plot shows the scattering vector directions in the crystal reference frame
used to determine the absorption correction. The s0 vectors are plotted in yellow,
the s1 vectors are plotted in teal. This gives an indication of which parts of
the absorption correction surface are sampled when determining the absorption
correction. In this plot, the pole (polar angle 0) corresponds to the laboratory
x-axis.""",
    }

    STEPS = 180  # do one point per degree
    azimuth_ = np.linspace(0, 2 * np.pi, 2 * STEPS)
    polar_ = np.linspace(0, np.pi, STEPS)
    THETA, _ = np.meshgrid(azimuth_, polar_, indexing="ij")
    Intensity = np.full(THETA.shape, np.NAN)

    # note, the s1_lookup, s0_lookup is only calculated for large datasets, so
    # for small datasets we need to calculate again.
    if "s1_lookup" not in physical_model.components["absorption"].data:
        s1_lookup = calc_lookup_index(
            calc_theta_phi(reflection_table["s1c"]), points_per_degree=1
        )
        idx_polar, idx_azimuth = np.divmod(np.unique(s1_lookup), 360)
        Intensity[idx_azimuth, idx_polar] = 1
    else:
        s1_lookup = np.unique(physical_model.components["absorption"].data["s1_lookup"])
        # x is phi, y is theta
        idx_polar, idx_azimuth = np.divmod(s1_lookup, 720)
        idx_polar = idx_polar // 2  # convert from two points per degree to one
        idx_azimuth = idx_azimuth // 2
        Intensity[idx_azimuth, idx_polar] = 1

    d["vector_directions"]["data"].append(
        {
            "x": list(azimuth_ * 180.0 / np.pi),
            "y": list(polar_ * 180.0 / np.pi),
            "z": list(Intensity.T.tolist()),
            "type": "heatmap",
            "colorscale": "Viridis",
            "showscale": False,
            "xaxis": "x",
            "yaxis": "y",
            "zmin": 0,
            "zmax": 2,
        }
    )

    Intensity = np.full(THETA.shape, np.NAN)

    if "s0_lookup" not in physical_model.components["absorption"].data:
        s0_lookup = calc_lookup_index(
            calc_theta_phi(reflection_table["s0c"]), points_per_degree=1
        )
        idx_polar, idx_azimuth = np.divmod(np.unique(s0_lookup), 360)
        Intensity[idx_azimuth, idx_polar] = 2
    else:
        s0_lookup = np.unique(physical_model.components["absorption"].data["s0_lookup"])
        # x is phi, y is theta
        idx_polar, idx_azimuth = np.divmod(s0_lookup, 720)
        idx_polar = idx_polar // 2  # convert from two points per degree to one
        idx_azimuth = idx_azimuth // 2
        Intensity[idx_azimuth, idx_polar] = 2

    d["vector_directions"]["data"].append(
        {
            "x": list(azimuth_ * 180.0 / np.pi),
            "y": list(polar_ * 180.0 / np.pi),
            "z": list(Intensity.T.tolist()),
            "type": "heatmap",
            "colorscale": "Viridis",
            "showscale": False,
            "xaxis": "x",
            "yaxis": "y",
            "zmin": 0,
            "zmax": 2,
        }
    )

    scales = physical_model.components["absorption"].calculate_scales()
    hist = flex.histogram(scales, n_slots=min(100, int(scales.size() * 10)))

    d["absorption_corrections"] = {
        "data": [
            {
                "x": list(hist.slot_centers()),
                "y": list(hist.slots()),
                "type": "bar",
                "name": "Applied absorption corrections",
            },
        ],
        "layout": {
            "title": "Applied absorption corrections",
            "xaxis": {"anchor": "y", "title": "Inverse scale factor"},
            "yaxis": {"anchor": "x", "title": "Number of reflections"},
        },
    }

    return d
Beispiel #25
0
def run(args):
  sweep_directories = []
  templates = []
  n_strong_spots = flex.int()
  n_strong_spots_dmin_4 = flex.int()
  d_strong_spots_99th_percentile = flex.double()
  d_strong_spots_95th_percentile = flex.double()
  d_strong_spots_50th_percentile = flex.double()
  n_unindexed_spots = flex.int()
  n_indexed_lattices = flex.int()
  n_integrated_lattices = flex.int()
  sweep_dir_cryst = flex.std_string()

  orig_dir = os.path.abspath(os.curdir)

  rmsds = flex.vec3_double()
  cell_params = flex.sym_mat3_double()
  n_indexed = flex.double()
  d_min_indexed = flex.double()
  rmsds = flex.vec3_double()

  nproc = easy_mp.get_processes(libtbx.Auto)
  #nproc = 1
  results = easy_mp.parallel_map(
    func=run_once,
    iterable=args,
    processes=nproc,
    method="multiprocessing",
    preserve_order=True,
    asynchronous=True,
    preserve_exception_message=True,
  )

  for result in results:
    if result is None: continue
    sweep_directories.append(result.sweep_dir)
    templates.append(result.template)
    n_strong_spots.append(result.n_strong_spots)
    n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4)
    n_unindexed_spots.append(result.n_unindexed_spots)
    n_indexed_lattices.append(result.n_indexed_lattices)
    n_integrated_lattices.append(result.n_integrated_lattices)
    d_strong_spots_50th_percentile.append(result.d_strong_spots_50th_percentile)
    d_strong_spots_95th_percentile.append(result.d_strong_spots_95th_percentile)
    d_strong_spots_99th_percentile.append(result.d_strong_spots_99th_percentile)
    cell_params.extend(result.cell_params)
    n_indexed.extend(result.n_indexed)
    d_min_indexed.extend(result.d_min_indexed)
    rmsds.extend(result.rmsds)
    sweep_dir_cryst.extend(result.sweep_dir_cryst)

  table_data = [('sweep_dir', 'template', '#strong_spots', '#unindexed_spots', '#lattices',
                 'd_spacing_50th_percentile', 'd_spacing_95th_percentile',
                 'd_spacing_99th_percentile',)]
  for i in range(len(sweep_directories)):
    table_data.append((sweep_directories[i],
                       templates[i],
                       str(n_strong_spots[i]),
                       str(n_unindexed_spots[i]),
                       str(n_indexed_lattices[i]),
                       str(d_strong_spots_50th_percentile[i]),
                       str(d_strong_spots_95th_percentile[i]),
                       str(d_strong_spots_99th_percentile[i]),
                       ))

  with open('results.txt', 'wb') as f:
    print >> f, table_utils.format(
      table_data, has_header=True, justify='right')

  table_data = [('sweep_dir', 'cell_a', 'cell_b', 'cell_c', 'alpha', 'beta', 'gamma',
                 '#indexed_reflections', 'd_min_indexed',
                 'rmsd_x', 'rmsd_y', 'rmsd_phi')]
  for i in range(len(cell_params)):
    table_data.append((sweep_dir_cryst[i],
                       str(cell_params[i][0]),
                       str(cell_params[i][1]),
                       str(cell_params[i][2]),
                       str(cell_params[i][3]),
                       str(cell_params[i][4]),
                       str(cell_params[i][5]),
                       str(n_indexed[i]),
                       str(d_min_indexed[i]),
                       str(rmsds[i][0]),
                       str(rmsds[i][1]),
                       str(rmsds[i][2]),
                       ))

  with open('results_indexed.txt', 'wb') as f:
    print >> f, table_utils.format(
      table_data, has_header=True, justify='right')

  cell_a = flex.double([params[0] for params in cell_params])
  cell_b = flex.double([params[1] for params in cell_params])
  cell_c = flex.double([params[2] for params in cell_params])
  cell_alpha = flex.double([params[3] for params in cell_params])
  cell_beta = flex.double([params[4] for params in cell_params])
  cell_gamma = flex.double([params[5] for params in cell_params])

  from matplotlib import pyplot
  from matplotlib.backends.backend_pdf import PdfPages

  pyplot.rc('font', family='serif')
  pyplot.rc('font', serif='Times New Roman')

  red, blue = '#B2182B', '#2166AC'
  hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20)
  hist.show()
  fig = pyplot.figure()
  ax = fig.add_subplot(1,1,1)
  ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(),
         color=blue, edgecolor=blue)
  ax.set_xlabel('Spot count')
  ax.set_ylabel('Frequency')
  pdf = PdfPages("spot_count_histogram.pdf")
  pdf.savefig(fig)
  pdf.close()
  #pyplot.show()

  hist = flex.histogram(n_indexed_lattices.as_double(),
                        n_slots=flex.max(n_indexed_lattices))
  hist.show()
  fig = pyplot.figure()
  ax = fig.add_subplot(1,1,1)
  ax.bar(range(int(hist.data_max())), hist.slots(),
         width=0.75*hist.slot_width(), align='center',
         color=blue, edgecolor=blue)
  ax.set_xlim(-0.5, hist.data_max()-0.5)
  ax.set_xticks(range(0,int(hist.data_max())))
  ax.set_xlabel('Number of indexed lattices')
  ax.set_ylabel('Frequency')
  pdf = PdfPages("n_indexed_lattices_histogram.pdf")
  pdf.savefig(fig)
  pdf.close()
  #pyplot.show()

  if flex.max(n_integrated_lattices) > 0:
    hist = flex.histogram(n_integrated_lattices.as_double(),
                          n_slots=flex.max(n_integrated_lattices))
    hist.show()
    fig = pyplot.figure()
    ax = fig.add_subplot(1,1,1)
    ax.bar(range(int(hist.data_max())), hist.slots(),
           width=0.75*hist.slot_width(),
           align='center', color=blue, edgecolor=blue)
    ax.set_xlim(-0.5, hist.data_max()-0.5)
    ax.set_xticks(range(0,int(hist.data_max())))
    ax.set_xlabel('Number of integrated lattices')
    ax.set_ylabel('Frequency')
    pdf = PdfPages("n_integrated_lattices_histogram.pdf")
    pdf.savefig(fig)
    pdf.close()
    #pyplot.show()

  fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False)
  for i, cell_param in enumerate(
    (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)):
    ax = axes.flat[i]
    flex.min_max_mean_double(cell_param).show()
    print flex.median(cell_param)
    hist = flex.histogram(cell_param, n_slots=20)
    hist.show()
    ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(),
           color=blue, edgecolor=blue)
    ax.set_xlabel('Cell parameter')
    ax.set_ylabel('Frequency')
  pyplot.tight_layout()
  pdf = PdfPages("cell_parameters.pdf")
  pdf.savefig(fig)
  pdf.close()
Beispiel #26
0
def normal_probability_plot(data):
    """Plot the distribution of normal probabilities of errors."""
    norm = distributions.normal_distribution()

    n = len(data["delta_hl"])
    if n <= 10:
        a = 3 / 8
    else:
        a = 0.5

    y = flex.sorted(flex.double(data["delta_hl"]))
    x = [norm.quantile((i + 1 - a) / (n + 1 - (2 * a))) for i in range(n)]

    H, xedges, yedges = np.histogram2d(np.array(x),
                                       y.as_numpy_array(),
                                       bins=(200, 200))
    nonzeros = np.nonzero(H)
    z = np.empty(H.shape)
    z[:] = np.NAN
    z[nonzeros] = H[nonzeros]

    # also make a histogram
    histy = flex.histogram(y, n_slots=100)
    # make a gaussian for reference also
    n = y.size()
    width = histy.slot_centers()[1] - histy.slot_centers()[0]
    gaussian = [
        n * width * math.exp(-(sc**2) / 2.0) / ((2.0 * math.pi)**0.5)
        for sc in histy.slot_centers()
    ]

    return {
        "normal_distribution_plot": {
            "data": [
                {
                    "x": xedges.tolist(),
                    "y": yedges.tolist(),
                    "z": z.transpose().tolist(),
                    "type": "heatmap",
                    "name": "normalised deviations",
                    "colorbar": {
                        "title": "Number of reflections",
                        "titleside": "right",
                    },
                    "colorscale": "Jet",
                },
                {
                    "x": [-5, 5],
                    "y": [-5, 5],
                    "type": "scatter",
                    "mode": "lines",
                    "name": "z = m",
                    "color": "rgb(0,0,0)",
                },
            ],
            "layout": {
                "title": "Normal probability plot with error model applied",
                "xaxis": {
                    "anchor": "y",
                    "title": "Order statistic medians, m"
                },
                "yaxis": {
                    "anchor": "x",
                    "title": "Ordered responses, z"
                },
            },
            "help":
            """\
This plot shows the normalised devations (of each reflection from the
group-weighted mean), sorted in order and plotted against the expected order
based on a normal distribution model. A true normal distribution of deviations
would give the straight line indicated. If the errors are well described by
this model, the ordered responses should closely fit the straight line to
high absolute values of x (>3), where there is typically a deviation away from
the line due to wide tails of the distribution.
""",
        },
        "nor_dev_hist": {
            "data": [
                {
                    "x": list(histy.slot_centers()),
                    "y": list(histy.slots()),
                    "type": "bar",
                    "name": "dataset normalised deviations",
                },
                {
                    "x": list(histy.slot_centers()),
                    "y": gaussian,
                    "type": "scatter",
                    "name": "Ideal normal distribution",
                },
            ],
            "layout": {
                "title": "Normal deviations with error model applied",
                "xaxis": {
                    "anchor": "y",
                    "title": "Normalised deviation"
                },
                "yaxis": {
                    "anchor": "x",
                    "title": "Number of reflections"
                },
            },
            "help":
            """\
This plot shows the distribution of normalised devations (of each reflection
from the group-weighted mean), for the reflections used to minimise the error
model. A true normal distribution is indicated.
""",
        },
    }
Beispiel #27
0
from __future__ import print_function
from dials.array_family import flex
import cPickle as pickle
import sys

refl_in = sys.argv[1]
refl_out = sys.argv[2]
part_lim = float(sys.argv[3])

refl = pickle.load(open(refl_in, "r"))

# remove duff reflections
sel = refl.get_flags(refl.flags.integrated_sum, all=True)
refl = refl.select(sel)
sel = refl.get_flags(refl.flags.integrated, all=True)
refl = refl.select(sel)

# extract partiality, plot histogram, select subset, dump
part = refl["partiality"]
parth = flex.histogram(part, n_slots=20, data_min=0.0, data_max=1.0)
parth.show()
edge = part < part_lim
edge_refl = refl.select(edge)

pickle.dump(edge_refl, open(refl_out, "w"))

print("Wrote %d reflections with partiality < %.3f to %s" %
      (len(edge_refl), part_lim, refl_out))
Beispiel #28
0
def run(args):
    sweep_directories = []
    templates = []
    n_strong_spots = flex.int()
    n_strong_spots_dmin_4 = flex.int()
    d_strong_spots_99th_percentile = flex.double()
    d_strong_spots_95th_percentile = flex.double()
    d_strong_spots_50th_percentile = flex.double()
    n_unindexed_spots = flex.int()
    n_indexed_lattices = flex.int()
    n_integrated_lattices = flex.int()
    sweep_dir_cryst = flex.std_string()

    orig_dir = os.path.abspath(os.curdir)

    rmsds = flex.vec3_double()
    cell_params = flex.sym_mat3_double()
    n_indexed = flex.double()
    d_min_indexed = flex.double()
    rmsds = flex.vec3_double()

    nproc = easy_mp.get_processes(libtbx.Auto)
    # nproc = 1
    results = easy_mp.parallel_map(
        func=run_once,
        iterable=args,
        processes=nproc,
        method="multiprocessing",
        preserve_order=True,
        asynchronous=True,
        preserve_exception_message=True,
    )

    for result in results:
        if result is None:
            continue
        sweep_directories.append(result.sweep_dir)
        templates.append(result.template)
        n_strong_spots.append(result.n_strong_spots)
        n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4)
        n_unindexed_spots.append(result.n_unindexed_spots)
        n_indexed_lattices.append(result.n_indexed_lattices)
        n_integrated_lattices.append(result.n_integrated_lattices)
        d_strong_spots_50th_percentile.append(
            result.d_strong_spots_50th_percentile)
        d_strong_spots_95th_percentile.append(
            result.d_strong_spots_95th_percentile)
        d_strong_spots_99th_percentile.append(
            result.d_strong_spots_99th_percentile)
        cell_params.extend(result.cell_params)
        n_indexed.extend(result.n_indexed)
        d_min_indexed.extend(result.d_min_indexed)
        rmsds.extend(result.rmsds)
        sweep_dir_cryst.extend(result.sweep_dir_cryst)

    table_data = [(
        "sweep_dir",
        "template",
        "#strong_spots",
        "#unindexed_spots",
        "#lattices",
        "d_spacing_50th_percentile",
        "d_spacing_95th_percentile",
        "d_spacing_99th_percentile",
    )]
    for i in range(len(sweep_directories)):
        table_data.append((
            sweep_directories[i],
            templates[i],
            str(n_strong_spots[i]),
            str(n_unindexed_spots[i]),
            str(n_indexed_lattices[i]),
            str(d_strong_spots_50th_percentile[i]),
            str(d_strong_spots_95th_percentile[i]),
            str(d_strong_spots_99th_percentile[i]),
        ))

    with open("results.txt", "wb") as f:
        print(table_utils.format(table_data, has_header=True, justify="right"),
              file=f)

    table_data = [(
        "sweep_dir",
        "cell_a",
        "cell_b",
        "cell_c",
        "alpha",
        "beta",
        "gamma",
        "#indexed_reflections",
        "d_min_indexed",
        "rmsd_x",
        "rmsd_y",
        "rmsd_phi",
    )]
    for i in range(len(cell_params)):
        table_data.append((
            sweep_dir_cryst[i],
            str(cell_params[i][0]),
            str(cell_params[i][1]),
            str(cell_params[i][2]),
            str(cell_params[i][3]),
            str(cell_params[i][4]),
            str(cell_params[i][5]),
            str(n_indexed[i]),
            str(d_min_indexed[i]),
            str(rmsds[i][0]),
            str(rmsds[i][1]),
            str(rmsds[i][2]),
        ))

    with open("results_indexed.txt", "wb") as f:
        print(table_utils.format(table_data, has_header=True, justify="right"),
              file=f)

    cell_a = flex.double([params[0] for params in cell_params])
    cell_b = flex.double([params[1] for params in cell_params])
    cell_c = flex.double([params[2] for params in cell_params])
    cell_alpha = flex.double([params[3] for params in cell_params])
    cell_beta = flex.double([params[4] for params in cell_params])
    cell_gamma = flex.double([params[5] for params in cell_params])

    from matplotlib import pyplot
    from matplotlib.backends.backend_pdf import PdfPages

    pyplot.rc("font", family="serif")
    pyplot.rc("font", serif="Times New Roman")

    red, blue = "#B2182B", "#2166AC"
    hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20)
    hist.show()
    fig = pyplot.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.bar(
        hist.slot_centers(),
        hist.slots(),
        width=0.75 * hist.slot_width(),
        color=blue,
        edgecolor=blue,
    )
    ax.set_xlabel("Spot count")
    ax.set_ylabel("Frequency")
    pdf = PdfPages("spot_count_histogram.pdf")
    pdf.savefig(fig)
    pdf.close()
    # pyplot.show()

    hist = flex.histogram(n_indexed_lattices.as_double(),
                          n_slots=flex.max(n_indexed_lattices))
    hist.show()
    fig = pyplot.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.bar(
        range(int(hist.data_max())),
        hist.slots(),
        width=0.75 * hist.slot_width(),
        align="center",
        color=blue,
        edgecolor=blue,
    )
    ax.set_xlim(-0.5, hist.data_max() - 0.5)
    ax.set_xticks(range(0, int(hist.data_max())))
    ax.set_xlabel("Number of indexed lattices")
    ax.set_ylabel("Frequency")
    pdf = PdfPages("n_indexed_lattices_histogram.pdf")
    pdf.savefig(fig)
    pdf.close()
    # pyplot.show()

    if flex.max(n_integrated_lattices) > 0:
        hist = flex.histogram(n_integrated_lattices.as_double(),
                              n_slots=flex.max(n_integrated_lattices))
        hist.show()
        fig = pyplot.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.bar(
            range(int(hist.data_max())),
            hist.slots(),
            width=0.75 * hist.slot_width(),
            align="center",
            color=blue,
            edgecolor=blue,
        )
        ax.set_xlim(-0.5, hist.data_max() - 0.5)
        ax.set_xticks(range(0, int(hist.data_max())))
        ax.set_xlabel("Number of integrated lattices")
        ax.set_ylabel("Frequency")
        pdf = PdfPages("n_integrated_lattices_histogram.pdf")
        pdf.savefig(fig)
        pdf.close()
        # pyplot.show()

    fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False)
    for i, cell_param in enumerate(
        (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)):
        ax = axes.flat[i]
        flex.min_max_mean_double(cell_param).show()
        print(flex.median(cell_param))
        hist = flex.histogram(cell_param, n_slots=20)
        hist.show()
        ax.bar(
            hist.slot_centers(),
            hist.slots(),
            width=0.75 * hist.slot_width(),
            color=blue,
            edgecolor=blue,
        )
        ax.set_xlabel("Cell parameter")
        ax.set_ylabel("Frequency")
    pyplot.tight_layout()
    pdf = PdfPages("cell_parameters.pdf")
    pdf.savefig(fig)
    pdf.close()
Beispiel #29
0
def get_histogram(d_star_sq, target_n_per_bin=20, max_slots=20, min_slots=5):
    n_slots = len(d_star_sq) // target_n_per_bin
    n_slots = min(n_slots, max_slots)
    n_slots = max(n_slots, min_slots)
    return flex.histogram(d_star_sq, n_slots=n_slots)
Beispiel #30
0
def run_with_preparsed(experiments, reflections, params):
    from dxtbx.model import ExperimentList
    from scitbx.math import five_number_summary

    print("Found", len(reflections), "reflections", "and", len(experiments),
          "experiments")

    filtered_reflections = flex.reflection_table()
    filtered_experiments = ExperimentList()

    skipped_reflections = flex.reflection_table()
    skipped_experiments = ExperimentList()

    if params.detector is not None:
        culled_reflections = flex.reflection_table()
        culled_experiments = ExperimentList()
        detector = experiments.detectors()[params.detector]
        for expt_id, experiment in enumerate(experiments):
            refls = reflections.select(reflections['id'] == expt_id)
            if experiment.detector is detector:
                culled_experiments.append(experiment)
                refls['id'] = flex.int(len(refls), len(culled_experiments) - 1)
                culled_reflections.extend(refls)
            else:
                skipped_experiments.append(experiment)
                refls['id'] = flex.int(len(refls),
                                       len(skipped_experiments) - 1)
                skipped_reflections.extend(refls)

        print("RMSD filtering %d experiments using detector %d, out of %d" %
              (len(culled_experiments), params.detector, len(experiments)))
        reflections = culled_reflections
        experiments = culled_experiments

    difference_vector_norms = (reflections['xyzcal.mm'] -
                               reflections['xyzobs.mm.value']).norms()

    if params.max_delta is not None:
        sel = difference_vector_norms <= params.max_delta
        reflections = reflections.select(sel)
        difference_vector_norms = difference_vector_norms.select(sel)

    data = flex.double()
    counts = flex.double()
    for i in range(len(experiments)):
        dvns = difference_vector_norms.select(reflections['id'] == i)
        counts.append(len(dvns))
        if len(dvns) == 0:
            data.append(0)
            continue
        rmsd = math.sqrt(flex.sum_sq(dvns) / len(dvns))
        data.append(rmsd)
    data *= 1000
    subset = data.select(counts > 0)
    print(len(subset), "experiments with > 0 reflections")

    if params.show_plots:
        h = flex.histogram(subset, n_slots=40)
        fig = plt.figure()
        ax = fig.add_subplot('111')
        ax.plot(h.slot_centers().as_numpy_array(),
                h.slots().as_numpy_array(), '-')
        plt.title("Histogram of %d image RMSDs" % len(subset))

        fig = plt.figure()
        plt.boxplot(subset, vert=False)
        plt.title("Boxplot of %d image RMSDs" % len(subset))
        plt.show()

    outliers = counts == 0
    min_x, q1_x, med_x, q3_x, max_x = five_number_summary(subset)
    print(
        "Five number summary of RMSDs (microns): min %.1f, q1 %.1f, med %.1f, q3 %.1f, max %.1f"
        % (min_x, q1_x, med_x, q3_x, max_x))
    iqr_x = q3_x - q1_x
    cut_x = params.iqr_multiplier * iqr_x
    outliers.set_selected(data > q3_x + cut_x, True)
    #outliers.set_selected(col < q1_x - cut_x, True) # Don't throw away the images that are outliers in the 'good' direction!

    for i in range(len(experiments)):
        if outliers[i]:
            continue
        refls = reflections.select(reflections['id'] == i)
        refls['id'] = flex.int(len(refls), len(filtered_experiments))
        filtered_reflections.extend(refls)
        filtered_experiments.append(experiments[i])

    #import IPython;IPython.embed()
    zeroes = counts == 0
    n_zero = len(counts.select(zeroes))
    print(
        "Removed %d bad experiments and %d experiments with zero reflections, out of %d (%%%.1f)"
        %
        (len(experiments) - len(filtered_experiments) - n_zero, n_zero,
         len(experiments), 100 *
         ((len(experiments) - len(filtered_experiments)) / len(experiments))))

    if params.detector is not None:
        crystals = filtered_experiments.crystals()
        for expt_id, experiment in enumerate(skipped_experiments):
            if experiment.crystal in crystals:
                filtered_experiments.append(experiment)
                refls = skipped_reflections.select(
                    skipped_reflections['id'] == expt_id)
                refls['id'] = flex.int(len(refls),
                                       len(filtered_experiments) - 1)
                filtered_reflections.extend(refls)

    if params.delta_psi_filter is not None:
        delta_psi = filtered_reflections['delpsical.rad'] * 180 / math.pi
        sel = (delta_psi <= params.delta_psi_filter) & (
            delta_psi >= -params.delta_psi_filter)
        l = len(filtered_reflections)
        filtered_reflections = filtered_reflections.select(sel)
        print("Filtering by delta psi, removing %d out of %d reflections" %
              (l - len(filtered_reflections), l))

    print("Final experiment count", len(filtered_experiments))
    return filtered_experiments, filtered_reflections
    def run(self):
        ''' Parse the options. '''
        from dials.util.options import flatten_experiments, flatten_reflections
        from dxtbx.model.experiment.experiment_list import ExperimentList
        from scitbx.math import five_number_summary
        # Parse the command line arguments
        params, options = self.parser.parse_args(show_diff_phil=True)
        self.params = params
        experiments = flatten_experiments(params.input.experiments)
        reflections = flatten_reflections(params.input.reflections)

        assert len(reflections) == 1
        reflections = reflections[0]
        print "Found", len(reflections), "reflections", "and", len(
            experiments), "experiments"

        difference_vector_norms = (reflections['xyzcal.mm'] -
                                   reflections['xyzobs.mm.value']).norms()

        data = flex.double()
        counts = flex.double()
        for i in xrange(len(experiments)):
            dvns = difference_vector_norms.select(reflections['id'] == i)
            counts.append(len(dvns))
            if len(dvns) == 0:
                data.append(0)
                continue
            rmsd = math.sqrt(flex.sum_sq(dvns) / len(dvns))
            data.append(rmsd)
        data *= 1000
        subset = data.select(counts > 0)
        print len(subset), "experiments with > 0 reflections"

        if params.show_plots:
            h = flex.histogram(subset, n_slots=40)
            fig = plt.figure()
            ax = fig.add_subplot('111')
            ax.plot(h.slot_centers().as_numpy_array(),
                    h.slots().as_numpy_array(), '-')
            plt.title("Histogram of %d image RMSDs" % len(subset))

            fig = plt.figure()
            plt.boxplot(subset, vert=False)
            plt.title("Boxplot of %d image RMSDs" % len(subset))
            plt.show()

        outliers = counts == 0
        min_x, q1_x, med_x, q3_x, max_x = five_number_summary(subset)
        print "Five number summary of RMSDs (microns): min %.1f, q1 %.1f, med %.1f, q3 %.1f, max %.1f" % (
            min_x, q1_x, med_x, q3_x, max_x)
        iqr_x = q3_x - q1_x
        cut_x = params.iqr_multiplier * iqr_x
        outliers.set_selected(data > q3_x + cut_x, True)
        #outliers.set_selected(col < q1_x - cut_x, True) # Don't throw away the images that are outliers in the 'good' direction!

        filtered_reflections = flex.reflection_table()
        filtered_experiments = ExperimentList()
        for i in xrange(len(experiments)):
            if outliers[i]:
                continue
            refls = reflections.select(reflections['id'] == i)
            refls['id'] = flex.int(len(refls), len(filtered_experiments))
            filtered_reflections.extend(refls)
            filtered_experiments.append(experiments[i])

        zeroes = counts == 0
        n_zero = len(counts.select(zeroes))
        print "Removed %d bad experiments and %d experiments with zero reflections, out of %d (%%%.1f)" % (
            len(experiments) - len(filtered_experiments) - n_zero, n_zero,
            len(experiments), 100 *
            ((len(experiments) - len(filtered_experiments)) /
             len(experiments)))
        from dxtbx.model.experiment.experiment_list import ExperimentListDumper
        dump = ExperimentListDumper(filtered_experiments)
        dump.as_json(params.output.filtered_experiments)

        filtered_reflections.as_pickle(params.output.filtered_reflections)