def testNormConst2D(self, dtype):
     expected = 2.
     # 2x2 correlation matrices are determined by one number between -1
     # and 1, so the volume of density 1 over all of them is 2.
     answer = self.evaluate(
         tfd.LKJ(2, dtype([1.]), validate_args=True)._log_normalization())
     self.assertAllClose(answer, np.log([expected]))
 def testMeanHigherDimension(self, dtype):
     testee_lkj = tfd.LKJ(dimension=6,
                          concentration=dtype([1., 3., 5.]),
                          validate_args=True)
     num_samples = 20000
     results = testee_lkj.sample(sample_shape=[num_samples],
                                 seed=test_util.test_seed())
     mean = testee_lkj.mean()
     self.assertEqual(mean.shape, [3, 6, 6])
     # tfd.LKJ has some small numerical issues, so we allow for some amount of
     # numerical tolerance when testing means.
     numerical_tolerance = 1e-5
     check1 = st.assert_true_mean_in_interval_by_dkwm(
         samples=results,
         low=-1.,
         high=1.,
         expected_low=mean - numerical_tolerance,
         expected_high=mean + numerical_tolerance,
         false_fail_rate=1e-6)
     check2 = assert_util.assert_less(
         st.min_discrepancy_of_true_means_detectable_by_dkwm(
             num_samples,
             low=-1.,
             high=1.,
             # Smaller false fail rate because of different batch sizes between
             # these two checks.
             false_fail_rate=1e-7,
             false_pass_rate=1e-6),
         # 4% relative error
         0.08)
     self.evaluate([check1, check2])
Example #3
0
 def testValidateConcentration(self, dtype):
   dimension = 3
   concentration = tf.Variable(0.5, dtype=dtype)
   d = tfd.LKJ(dimension, concentration, validate_args=True)
   with self.assertRaisesOpError('Argument `concentration` must be >= 1.'):
     self.evaluate([v.initializer for v in d.variables])
     self.evaluate(d.sample(seed=test_util.test_seed()))
 def testOneDimension(self, dtype):
     testee_lkj = tfd.LKJ(dimension=1,
                          concentration=dtype([1., 4.]),
                          validate_args=True)
     results = testee_lkj.sample(sample_shape=[4, 3],
                                 seed=test_util.test_seed())
     self.assertEqual(results.shape, [4, 3, 2, 1, 1])
Example #5
0
 def testMean(self, dtype):
     testee_lkj = tfd.LKJ(dimension=3,
                          concentration=dtype([1., 3., 5.]),
                          validate_args=True)
     num_samples = 20000
     results = testee_lkj.sample(sample_shape=[num_samples])
     mean = testee_lkj.mean()
     self.assertEqual(mean.shape, [3, 3, 3])
     check1 = st.assert_true_mean_equal_by_dkwm(samples=results,
                                                low=-1.,
                                                high=1.,
                                                expected=mean,
                                                false_fail_rate=1e-6)
     check2 = assert_util.assert_less(
         st.min_discrepancy_of_true_means_detectable_by_dkwm(
             num_samples,
             low=-1.,
             high=1.,
             # Smaller false fail rate because of different batch sizes between
             # these two checks.
             false_fail_rate=1e-7,
             false_pass_rate=1e-6),
         # 4% relative error
         0.08)
     self.evaluate([check1, check2])
Example #6
0
 def testValidateConcentrationAfterMutation(self, dtype):
   dimension = 3
   concentration = tf.Variable(1.5, dtype=dtype)
   d = tfd.LKJ(dimension, concentration, validate_args=True)
   self.evaluate([v.initializer for v in d.variables])
   with self.assertRaisesOpError('Argument `concentration` must be >= 1.'):
     with tf.control_dependencies([concentration.assign(0.5)]):
       self.evaluate(d.mean())
Example #7
0
 def testDimensionGuardDynamicShape(self):
   if tf.executing_eagerly():
     return
   testee_lkj = tfd.LKJ(
       dimension=3, concentration=[1., 4.], validate_args=True)
   with self.assertRaisesOpError('dimension mismatch'):
     self.evaluate(
         testee_lkj.log_prob(
             tf1.placeholder_with_default(tf.eye(4), shape=None)))
Example #8
0
 def testAssertValidCorrelationMatrix(self, dtype):
   lkj = tfd.LKJ(
       dimension=2, concentration=dtype([1., 4.]), validate_args=True)
   with self.assertRaisesOpError('Correlations must be >= -1.'):
     self.evaluate(lkj.log_prob(dtype([[1., -1.3], [-1.3, 1.]])))
   with self.assertRaisesOpError('Correlations must be <= 1.'):
     self.evaluate(lkj.log_prob(dtype([[1., 1.3], [1.3, 1.]])))
   with self.assertRaisesOpError('Self-correlations must be = 1.'):
     self.evaluate(lkj.log_prob(dtype([[0.5, 0.5], [0.5, 1.]])))
   with self.assertRaisesOpError('Correlation matrices must be symmetric.'):
     self.evaluate(lkj.log_prob(dtype([[1., 0.2], [0.3, 1.]])))
Example #9
0
 def testNormConst3D(self, dtype):
     expected = np.pi**2 / 2.
     # 3x3 correlation matrices are determined by the three
     # lower-triangular entries.  In addition to being between -1 and
     # 1, they must also obey the constraint that the determinant of
     # the resulting symmetric matrix is non-negative.  The post
     # https://psychometroscar.com/the-volume-of-a-3-x-3-correlation-matrix/
     # derives (with elementary calculus) that the volume of this set
     # (with respect to Lebesgue^3 measure) is pi^2/2.  The same result
     # is also obtained by Rousseeuw, P. J., & Molenberghs,
     # G. (1994). "The shape of correlation matrices." The American
     # Statistician, 48(4), 276-279.
     answer = self.evaluate(tfd.LKJ(3, dtype([1.]))._log_normalization())
     self.assertAllClose(answer, np.log([expected]))
    def _testSampleLogProbExact(self,
                                concentrations,
                                det_bounds,
                                dim,
                                means,
                                num_samples=int(1e5),
                                dtype=np.float32,
                                target_discrepancy=0.1,
                                input_output_cholesky=False,
                                seed=42):
        # For test methodology see the comment in
        # _testSampleConsistentLogProbInterval, except that this test
        # checks those parameter settings where the true volume is known
        # analytically.
        concentration = np.array(concentrations, dtype=dtype)
        det_bounds = np.array(det_bounds, dtype=dtype)
        means = np.array(means, dtype=dtype)
        # Add a tolerance to guard against some of the importance_weights exceeding
        # the theoretical maximum (importance_maxima) due to numerical inaccuracies
        # while lower bounding the determinant. See corresponding comment in
        # _testSampleConsistentLogProbInterval.
        high_tolerance = 1e-6

        testee_lkj = tfd.LKJ(dimension=dim,
                             concentration=concentration,
                             input_output_cholesky=input_output_cholesky,
                             validate_args=True)
        x = testee_lkj.sample(num_samples, seed=seed)
        importance_weights = (
            tf.exp(-testee_lkj.log_prob(x)) *
            _det_ok_mask(x, det_bounds, input_output_cholesky))
        importance_maxima = (1. / det_bounds)**(concentration - 1) * tf.exp(
            testee_lkj._log_normalization())

        chk1 = st.assert_true_mean_equal_by_dkwm(importance_weights,
                                                 low=0.,
                                                 high=importance_maxima +
                                                 high_tolerance,
                                                 expected=means,
                                                 false_fail_rate=1e-6)
        chk2 = assert_util.assert_less(
            st.min_discrepancy_of_true_means_detectable_by_dkwm(
                num_samples,
                low=0.,
                high=importance_maxima + high_tolerance,
                false_fail_rate=1e-6,
                false_pass_rate=1e-6), dtype(target_discrepancy))
        self.evaluate([chk1, chk2])
 def testDefaultEventSpaceBijectorValidCorrelation(self, dtype):
     d = tfd.LKJ(3, tf.constant(1., dtype), validate_args=True)
     b = d.experimental_default_event_space_bijector()
     sample = b(tf.zeros((3, 3), dtype))
     self.evaluate(d.log_prob(sample))
 def testDimensionGuard(self, dtype):
     testee_lkj = tfd.LKJ(dimension=3,
                          concentration=dtype([1., 4.]),
                          validate_args=True)
     with self.assertRaisesRegexp(ValueError, 'dimension mismatch'):
         testee_lkj.log_prob(dtype(np.eye(4)))
    def _testSampleConsistentLogProbInterval(self,
                                             concentrations,
                                             det_bounds,
                                             dim,
                                             num_samples=int(1e5),
                                             dtype=np.float32,
                                             input_output_cholesky=False,
                                             false_fail_rate=1e-6,
                                             target_discrepancy=0.1,
                                             seed=42):
        # Consider the set M of dim x dim correlation matrices whose
        # determinant exceeds some bound (rationale for bound forthwith).
        # - This is a (convex!) shape in dim * (dim - 1) / 2 dimensions
        #   (because a correlation matrix is determined by its lower
        #   triangle, and the main diagonal is all 1s).
        # - Further, M is contained entirely in the [-1,1] cube,
        #   because no correlation can fall outside that interval.
        #
        # We have two different ways to estimate the volume of M:
        # - Importance sampling from the LKJ distribution
        # - Importance sampling from the uniform distribution on the cube
        #
        # This test checks that these two methods agree.  However, because
        # the uniform proposal leads to many rejections (thus slowness),
        # those volumes are computed offline and the confidence intervals
        # are presented to this test procedure in the "volume_bounds"
        # table.
        #
        # Why place a lower bound on the determinant?  Because for eta > 1,
        # the density of LKJ approaches 0 as the determinant approaches 0.
        # However, the test methodology requires an upper bound on the
        # improtance weights produced.  Rejecting matrices with too-small
        # determinant (from both methods) allows me to supply that bound.
        #
        # I considered several alternative regions whose volume I might
        # know analytically (without having to do rejection).
        # - Option a: Some hypersphere guaranteed to be contained inside M.
        #   - Con: I don't know a priori how to find a radius for it.
        #   - Con: I still need a lower bound on the determinants that appear
        #     in this sphere, and I don't know how to compute it.
        # - Option b: Some trapezoid given as the convex hull of the
        #   nearly-extreme correlation matrices (i.e., those that partition
        #   the variables into two strongly anti-correclated groups).
        #   - Con: Would have to dig up n-d convex hull code to implement this.
        #   - Con: Need to compute the volume of that convex hull.
        #   - Con: Need a bound on the determinants of the matrices in that hull.
        # - Option c: Same thing, but with the matrices that make a single pair
        #   of variables strongly correlated (or anti-correlated), and leaves
        #   the others uncorrelated.
        #   - Same cons, except that there is a determinant bound (which
        #     felt pretty loose).
        lows = [dtype(volume_bounds[dim][db][0]) for db in det_bounds]
        highs = [dtype(volume_bounds[dim][db][1]) for db in det_bounds]
        concentration = np.array(concentrations, dtype=dtype)
        det_bounds = np.array(det_bounds, dtype=dtype)
        # Due to possible numerical inaccuracies while lower bounding the
        # determinant, the maximum of the importance weights may exceed the
        # theoretical maximum (importance_maxima). We add a tolerance to guard
        # against this. An alternative would have been to add a threshold while
        # filtering in _det_ok_mask, but that would affect the mean as well.
        high_tolerance = 1e-6

        testee_lkj = tfd.LKJ(dimension=dim,
                             concentration=concentration,
                             input_output_cholesky=input_output_cholesky,
                             validate_args=True)
        x = testee_lkj.sample(num_samples, seed=seed)
        importance_weights = (
            tf.exp(-testee_lkj.log_prob(x)) *
            _det_ok_mask(x, det_bounds, input_output_cholesky))
        importance_maxima = (1. / det_bounds)**(concentration - 1) * tf.exp(
            testee_lkj._log_normalization())
        check1 = st.assert_true_mean_in_interval_by_dkwm(
            samples=importance_weights,
            low=0.,
            high=importance_maxima + high_tolerance,
            expected_low=lows,
            expected_high=highs,
            false_fail_rate=false_fail_rate)
        check2 = assert_util.assert_less(
            st.min_discrepancy_of_true_means_detectable_by_dkwm(
                num_samples,
                low=0.,
                high=importance_maxima + high_tolerance,
                false_fail_rate=false_fail_rate,
                false_pass_rate=false_fail_rate), dtype(target_discrepancy))
        self.evaluate([check1, check2])
Example #14
0
 def testZeroDimension(self, dtype):
     testee_lkj = tfd.LKJ(dimension=0,
                          concentration=dtype([1., 4.]),
                          validate_args=True)
     results = testee_lkj.sample(sample_shape=[4, 3])
     self.assertEqual(results.shape, [4, 3, 2, 0, 0])