コード例 #1
0
def test_fast_mcd_large(dials_regression):
    from scitbx.array_family import flex
    from dials.algorithms.statistics.fast_mcd import FastMCD

    # set random seeds to try to avoid assertion errors due to occasionally
    # finding less common solutions
    import random

    random.seed(42)
    flex.set_random_seed(42)

    # test large dataset algorithm
    import os

    data_pth = os.path.join(dials_regression, "refinement_test_data",
                            "outlier_rejection", "residuals.dat")

    with open(data_pth, "r") as f:
        residuals = f.readlines()

    # ignore first line, which is a header
    residuals = [[float(val) for val in e.split()] for e in residuals[1:]]
    X_resid_mm, Y_resid_mm, Phi_resid_mm = zip(*residuals)

    X_resid_mm = flex.double(X_resid_mm)
    Y_resid_mm = flex.double(Y_resid_mm)
    Phi_resid_mm = flex.double(Phi_resid_mm)

    # Fast MCD raw estimates
    fast_mcd = FastMCD([X_resid_mm, Y_resid_mm, Phi_resid_mm])
    T, S = fast_mcd.get_raw_T_and_S()
    from libtbx.test_utils import approx_equal

    assert approx_equal(
        T, [-0.009702392946856687, 0.008866136837504363, -0.04909037126352747])
    assert approx_equal(
        S,
        flex.double([
            [0.00527965256891, 0.000864300169087, -0.00145971018701],
            [0.000864300169087, 0.00842807897907, -0.00184047321286],
            [-0.00145971018701, -0.00184047321286, 0.00698461269031],
        ]),
    )

    # Fast MCD corrected estimates
    T, S = fast_mcd.get_corrected_T_and_S()
    assert approx_equal(
        T, [-0.009702392946856687, 0.008866136837504363, -0.04909037126352747])
    assert approx_equal(
        S,
        flex.double([
            [0.0129950608638, 0.00212734325892, -0.00359285435473],
            [0.00212734325892, 0.0207444330604, -0.00453004456394],
            [-0.00359285435473, -0.00453004456394, 0.0171915605878],
        ]),
    )

    # Correction factors
    assert approx_equal(fast_mcd._consistency_fac, 2.45659976388)
    assert approx_equal(fast_mcd._finite_samp_fac, 1.00193273884)
コード例 #2
0
ファイル: mcd.py プロジェクト: rjgildea/dials
    def _detect_outliers(self, cols):

        fast_mcd = FastMCD(
            cols,
            alpha=self._alpha,
            max_n_groups=self._max_n_groups,
            min_group_size=self._min_group_size,
            n_trials=self._n_trials,
            k1=self._k1,
            k2=self._k2,
            k3=self._k3,
        )

        # get location and MCD scatter estimate
        T, S = fast_mcd.get_corrected_T_and_S()

        # get squared Mahalanobis distances
        d2s = maha_dist_sq(cols, T, S)

        # compare to the threshold
        outliers = d2s > self._mahasq_cutoff

        return outliers
コード例 #3
0
ファイル: mcd.py プロジェクト: biochem-fan/dials
  def _detect_outliers(self, cols):

    outliers = flex.bool(len(cols[0]), False)

    fast_mcd = FastMCD(cols,
                       alpha = self._alpha,
                       max_n_groups = self._max_n_groups,
                       min_group_size = self._min_group_size,
                       n_trials = self._n_trials,
                       k1 = self._k1,
                       k2 = self._k2,
                       k3 = self._k3)

    # get location and MCD scatter estimate
    T, S = fast_mcd.get_corrected_T_and_S()

    # get squared Mahalanobis distances
    d2s = maha_dist_sq(cols, T, S)

    # compare to the threshold
    outliers = d2s > self._mahasq_cutoff

    return outliers
コード例 #4
0
ファイル: test_fast_mcd.py プロジェクト: kek-pf-mx/dials
def test_fast_mcd_small():
    from scitbx.array_family import flex
    from dials.algorithms.statistics.fast_mcd import FastMCD

    # set random seeds to try to avoid assertion errors due to occasionally
    # finding less common solutions
    import random
    random.seed(42)
    flex.set_random_seed(42)

    # some test data, from R package robustbase: Hawkins, Bradu, Kass's Artificial Data
    hbk = """10.1 19.6 28.3
   9.5 20.5 28.9
  10.7 20.2 31.0
   9.9 21.5 31.7
  10.3 21.1 31.1
  10.8 20.4 29.2
  10.5 20.9 29.1
   9.9 19.6 28.8
   9.7 20.7 31.0
   9.3 19.7 30.3
  11.0 24.0 35.0
  12.0 23.0 37.0
  12.0 26.0 34.0
  11.0 34.0 34.0
   3.4  2.9  2.1
   3.1  2.2  0.3
   0.0  1.6  0.2
   2.3  1.6  2.0
   0.8  2.9  1.6
   3.1  3.4  2.2
   2.6  2.2  1.9
   0.4  3.2  1.9
   2.0  2.3  0.8
   1.3  2.3  0.5
   1.0  0.0  0.4
   0.9  3.3  2.5
   3.3  2.5  2.9
   1.8  0.8  2.0
   1.2  0.9  0.8
   1.2  0.7  3.4
   3.1  1.4  1.0
   0.5  2.4  0.3
   1.5  3.1  1.5
   0.4  0.0  0.7
   3.1  2.4  3.0
   1.1  2.2  2.7
   0.1  3.0  2.6
   1.5  1.2  0.2
   2.1  0.0  1.2
   0.5  2.0  1.2
   3.4  1.6  2.9
   0.3  1.0  2.7
   0.1  3.3  0.9
   1.8  0.5  3.2
   1.9  0.1  0.6
   1.8  0.5  3.0
   3.0  0.1  0.8
   3.1  1.6  3.0
   3.1  2.5  1.9
   2.1  2.8  2.9
   2.3  1.5  0.4
   3.3  0.6  1.2
   0.3  0.4  3.3
   1.1  3.0  0.3
   0.5  2.4  0.9
   1.8  3.2  0.9
   1.8  0.7  0.7
   2.4  3.4  1.5
   1.6  2.1  3.0
   0.3  1.5  3.3
   0.4  3.4  3.0
   0.9  0.1  0.3
   1.1  2.7  0.2
   2.8  3.0  2.9
   2.0  0.7  2.7
   0.2  1.8  0.8
   1.6  2.0  1.2
   0.1  0.0  1.1
   2.0  0.6  0.3
   1.0  2.2  2.9
   2.2  2.5  2.3
   0.6  2.0  1.5
   0.3  1.7  2.2
   0.0  2.2  1.6
   0.3  0.4  2.6"""

    # unpack the data into vectors
    rows = [[float(e) for e in row.split()] for row in hbk.splitlines()]
    x1, x2, x3 = [flex.double(e) for e in zip(*rows)]

    # Fast MCD raw estimates
    fast_mcd = FastMCD([x1, x2, x3])
    T, S = fast_mcd.get_raw_T_and_S()
    from libtbx.test_utils import approx_equal
    assert approx_equal(
        T, [1.5333333333333334, 2.4564102564102566, 1.6076923076923078])
    assert approx_equal(
        S,
        flex.double([[1.18964912281, 0.00464912280702, 0.217368421053],
                     [0.00464912280702, 0.37620782726, 0.182186234818],
                     [0.217368421053, 0.182186234818, 0.910728744939]]))

    # Fast MCD corrected estimates
    T, S = fast_mcd.get_corrected_T_and_S()
    assert approx_equal(
        T, [1.5333333333333334, 2.4564102564102566, 1.6076923076923078])
    assert approx_equal(
        S,
        flex.double([[3.17735853174, 0.012417047794, 0.58055555535],
                     [0.01241704779, 1.00478967011, 0.486589681332],
                     [0.58055555535, 0.486589681332, 2.43240775146]]))

    # Correction factors
    assert approx_equal(fast_mcd._consistency_fac, 2.36792847084)
    assert approx_equal(fast_mcd._finite_samp_fac, 1.12792118859)
コード例 #5
0
    def _filter_reflections_based_on_centroid_distance(self):
        """
            Filter reflections too far from predicted position

        <<<<<<< HEAD
        """

        # Compute the x and y residuals
        Xobs, Yobs, _ = self.reflections["xyzobs.px.value"].parts()
        Xcal, Ycal, _ = self.reflections["xyzcal.px"].parts()
        Xres = Xobs - Xcal
        Yres = Yobs - Ycal

        # Compute the epsilon residual
        s0_length = 1.0 / self.experiments[0].beam.get_wavelength()
        s1x, s1y, s1z = self.reflections["s2"].parts()
        s1_length = flex.sqrt(s1x**2 + s1y**2 + s1z**2)
        Eres = s1_length - s0_length

        # Initialise the fast_mcd outlier algorithm
        # fast_mcd = FastMCD((Xres, Yres, Eres))
        fast_mcd = FastMCD((Xres, Yres))

        # get location and MCD scatter estimate
        T, S = fast_mcd.get_corrected_T_and_S()

        # get squared Mahalanobis distances
        # d2s = maha_dist_sq((Xres, Yres, Eres), T, S)
        d2s = maha_dist_sq((Xres, Yres), T, S)

        # Compute the cutoff
        mahasq_cutoff = chisq_quantile(
            2, self.params.refinement.outlier_probability)

        # compare to the threshold and select reflections
        selection1 = d2s < mahasq_cutoff
        selection2 = (flex.sqrt(Xres**2 + Yres**2) <
                      self.params.refinement.max_separation)
        selection = selection1 & selection2
        self.reflections = self.reflections.select(selection)

        # Print some stuff
        logger.info("-" * 80)
        logger.info("Centroid outlier rejection")
        logger.info(" Using MCD algorithm with probability = %f" %
                    self.params.refinement.outlier_probability)
        logger.info(" Max X residual: %f" % flex.max(flex.abs(Xres)))
        logger.info(" Max Y residual: %f" % flex.max(flex.abs(Yres)))
        logger.info(" Max E residual: %f" % flex.max(flex.abs(Eres)))
        logger.info(" Mean X RMSD: %f" % (sqrt(flex.sum(Xres**2) / len(Xres))))
        logger.info(" Mean Y RMSD: %f" % (sqrt(flex.sum(Yres**2) / len(Yres))))
        logger.info(" Mean E RMSD: %f" % (sqrt(flex.sum(Eres**2) / len(Eres))))
        logger.info(" MCD location estimate: %.4f, %.4f" % tuple(T))
        logger.info(""" MCD scatter estimate:
      %.7f, %.7f,
      %.7f, %.7f""" % tuple(list(S)))
        # logger.info(" MCD location estimate: %.4f, %.4f, %.4f" % tuple(T))
        # logger.info(''' MCD scatter estimate:
        #   %.7f, %.7f, %.7f,
        #   %.7f, %.7f, %.7f,
        #   %.7f, %.7f, %.7f''' % tuple(list(S)))
        logger.info(" Number of outliers: %d" % selection1.count(False))
        logger.info(
            " Number of reflections with residual > %0.2f pixels: %d" %
            (self.params.refinement.max_separation, selection2.count(False)))
        logger.info(" Number of reflections selection for refinement: %d" %
                    len(self.reflections))
        logger.info("-" * 80)

        # Throw exception
        if len(self.reflections) < self.params.refinement.min_n_reflections:
            raise RuntimeError(
                "Too few reflections to perform refinement: got %d, expected %d"
                % (len(self.reflections),
                   self.params.refinement.min_n_reflections))
コード例 #6
0
ファイル: tst_fast_mcd.py プロジェクト: biochem-fan/dials
def test_fast_mcd_small():

  from scitbx.array_family import flex
  from dials.algorithms.statistics.fast_mcd import FastMCD

  # set random seeds to try to avoid assertion errors due to occasionally
  # finding less common solutions
  import random
  random.seed(42)
  flex.set_random_seed(42)

  # some test data, from R package robustbase: Hawkins, Bradu, Kass's Artificial Data
  hbk = """10.1 19.6 28.3
   9.5 20.5 28.9
  10.7 20.2 31.0
   9.9 21.5 31.7
  10.3 21.1 31.1
  10.8 20.4 29.2
  10.5 20.9 29.1
   9.9 19.6 28.8
   9.7 20.7 31.0
   9.3 19.7 30.3
  11.0 24.0 35.0
  12.0 23.0 37.0
  12.0 26.0 34.0
  11.0 34.0 34.0
   3.4  2.9  2.1
   3.1  2.2  0.3
   0.0  1.6  0.2
   2.3  1.6  2.0
   0.8  2.9  1.6
   3.1  3.4  2.2
   2.6  2.2  1.9
   0.4  3.2  1.9
   2.0  2.3  0.8
   1.3  2.3  0.5
   1.0  0.0  0.4
   0.9  3.3  2.5
   3.3  2.5  2.9
   1.8  0.8  2.0
   1.2  0.9  0.8
   1.2  0.7  3.4
   3.1  1.4  1.0
   0.5  2.4  0.3
   1.5  3.1  1.5
   0.4  0.0  0.7
   3.1  2.4  3.0
   1.1  2.2  2.7
   0.1  3.0  2.6
   1.5  1.2  0.2
   2.1  0.0  1.2
   0.5  2.0  1.2
   3.4  1.6  2.9
   0.3  1.0  2.7
   0.1  3.3  0.9
   1.8  0.5  3.2
   1.9  0.1  0.6
   1.8  0.5  3.0
   3.0  0.1  0.8
   3.1  1.6  3.0
   3.1  2.5  1.9
   2.1  2.8  2.9
   2.3  1.5  0.4
   3.3  0.6  1.2
   0.3  0.4  3.3
   1.1  3.0  0.3
   0.5  2.4  0.9
   1.8  3.2  0.9
   1.8  0.7  0.7
   2.4  3.4  1.5
   1.6  2.1  3.0
   0.3  1.5  3.3
   0.4  3.4  3.0
   0.9  0.1  0.3
   1.1  2.7  0.2
   2.8  3.0  2.9
   2.0  0.7  2.7
   0.2  1.8  0.8
   1.6  2.0  1.2
   0.1  0.0  1.1
   2.0  0.6  0.3
   1.0  2.2  2.9
   2.2  2.5  2.3
   0.6  2.0  1.5
   0.3  1.7  2.2
   0.0  2.2  1.6
   0.3  0.4  2.6"""

  # unpack the data into vectors
  rows = [[float(e) for e in row.split()] for row in hbk.splitlines()]
  x1, x2, x3 = [flex.double(e) for e in zip(*rows)]

  # Fast MCD raw estimates
  fast_mcd = FastMCD([x1, x2, x3])
  T, S = fast_mcd.get_raw_T_and_S()
  from libtbx.test_utils import approx_equal
  assert approx_equal(T,
    [1.5333333333333334, 2.4564102564102566, 1.6076923076923078])
  assert approx_equal(S, flex.double(
    [[1.18964912281, 0.00464912280702, 0.217368421053],
     [0.00464912280702, 0.37620782726, 0.182186234818],
     [0.217368421053, 0.182186234818, 0.910728744939]]))

  # Fast MCD corrected estimates
  T, S = fast_mcd.get_corrected_T_and_S()
  assert approx_equal(T,
    [1.5333333333333334, 2.4564102564102566, 1.6076923076923078])
  assert approx_equal(S, flex.double(
    [[3.17735853174, 0.012417047794, 0.58055555535],
     [0.01241704779, 1.00478967011, 0.486589681332],
     [0.58055555535, 0.486589681332, 2.43240775146]]))

  # Correction factors
  assert approx_equal(fast_mcd._consistency_fac, 2.36792847084)
  assert approx_equal(fast_mcd._finite_samp_fac, 1.12792118859)

  print "OK"
  return
コード例 #7
0
ファイル: tst_fast_mcd.py プロジェクト: biochem-fan/dials
def test_fast_mcd_large():

  from scitbx.array_family import flex
  from dials.algorithms.statistics.fast_mcd import FastMCD

  # set random seeds to try to avoid assertion errors due to occasionally
  # finding less common solutions
  import random
  random.seed(42)
  flex.set_random_seed(42)

  # test large dataset algorithm
  import libtbx.load_env # required for libtbx.env.find_in_repositories
  if not libtbx.env.has_module("dials_regression"):
    print "Skipping test_fast_mcd_large(): dials_regression not available."
    return

  # load data
  import os
  dials_regression = libtbx.env.find_in_repositories(
      relative_path="dials_regression",
      test=os.path.isdir)
  data_pth = os.path.join(dials_regression, "refinement_test_data",
    "outlier_rejection", "residuals.dat")

  with(open(data_pth, "r")) as f:
    residuals = f.readlines()

  # ignore first line, which is a header
  residuals = [[float(val) for val in e.split()] for e in residuals[1:]]
  X_resid_mm, Y_resid_mm, Phi_resid_mm = zip(*residuals)

  X_resid_mm = flex.double(X_resid_mm)
  Y_resid_mm = flex.double(Y_resid_mm)
  Phi_resid_mm = flex.double(Phi_resid_mm)

  # Fast MCD raw estimates
  fast_mcd = FastMCD([X_resid_mm, Y_resid_mm, Phi_resid_mm])
  T, S = fast_mcd.get_raw_T_and_S()
  from libtbx.test_utils import approx_equal
  assert approx_equal(T,
    [-0.009702392946856687, 0.008866136837504363, -0.04909037126352747])
  assert approx_equal(S, flex.double(
    [[0.00527965256891, 0.000864300169087, -0.00145971018701],
     [0.000864300169087, 0.00842807897907, -0.00184047321286],
     [-0.00145971018701, -0.00184047321286, 0.00698461269031]]))

  # Fast MCD corrected estimates
  T, S = fast_mcd.get_corrected_T_and_S()
  assert approx_equal(T,
    [-0.009702392946856687, 0.008866136837504363, -0.04909037126352747])
  assert approx_equal(S, flex.double(
    [[0.0129950608638, 0.00212734325892, -0.00359285435473],
     [0.00212734325892, 0.0207444330604, -0.00453004456394],
     [-0.00359285435473, -0.00453004456394, 0.0171915605878]]))

  # Correction factors
  assert approx_equal(fast_mcd._consistency_fac, 2.45659976388)
  assert approx_equal(fast_mcd._finite_samp_fac, 1.00193273884)

  print "OK"
  return
コード例 #8
0
def _filter_reflections_based_on_centroid_distance(
    reflection_table,
    experiment,
    outlier_probability=0.975,
    max_separation=2,
):
    """
    Filter reflections too far from predicted position

    """

    # Compute the x and y residuals
    Xobs, Yobs, _ = reflection_table["xyzobs.px.value"].parts()
    Xcal, Ycal, _ = reflection_table["xyzcal.px"].parts()
    Xres = Xobs - Xcal
    Yres = Yobs - Ycal

    # Compute the epsilon residual
    s0_length = 1.0 / experiment.beam.get_wavelength()
    s1x, s1y, s1z = reflection_table["s2"].parts()
    s1_length = flex.sqrt(s1x**2 + s1y**2 + s1z**2)
    Eres = s1_length - s0_length

    # Initialise the fast_mcd outlier algorithm
    # fast_mcd = FastMCD((Xres, Yres, Eres))
    fast_mcd = FastMCD((Xres, Yres))

    # get location and MCD scatter estimate
    T, S = fast_mcd.get_corrected_T_and_S()

    # get squared Mahalanobis distances
    # d2s = maha_dist_sq((Xres, Yres, Eres), T, S)
    d2s = maha_dist_sq((Xres, Yres), T, S)

    # Compute the cutoff
    mahasq_cutoff = chisq_quantile(2, outlier_probability)

    # compare to the threshold and select reflections
    selection1 = d2s < mahasq_cutoff
    selection2 = flex.sqrt(Xres**2 + Yres**2) < max_separation
    selection = selection1 & selection2
    reflection_table = reflection_table.select(selection)
    n_refl = reflection_table.size()

    # Print some stuff
    logger.info("-" * 80)
    logger.info("Centroid outlier rejection")
    logger.info(
        f" Using MCD algorithm with probability = {outlier_probability}")
    logger.info(" Max X residual: %f" % flex.max(flex.abs(Xres)))
    logger.info(" Max Y residual: %f" % flex.max(flex.abs(Yres)))
    logger.info(" Max E residual: %f" % flex.max(flex.abs(Eres)))
    logger.info(" Mean X RMSD: %f" % (sqrt(flex.sum(Xres**2) / len(Xres))))
    logger.info(" Mean Y RMSD: %f" % (sqrt(flex.sum(Yres**2) / len(Yres))))
    logger.info(" Mean E RMSD: %f" % (sqrt(flex.sum(Eres**2) / len(Eres))))
    logger.info(" MCD location estimate: %.4f, %.4f" % tuple(T))
    logger.info(""" MCD scatter estimate:
    %.7f, %.7f,
    %.7f, %.7f""" % tuple(S))
    logger.info(" Number of outliers: %d" % selection1.count(False))
    logger.info(" Number of reflections with residual > %0.2f pixels: %d" %
                (max_separation, selection2.count(False)))
    logger.info(f"Number of reflections selection for refinement: {n_refl}")
    logger.info("-" * 80)

    return reflection_table