def test_dkwm_mean_two_sample_assertion(self): rng = np.random.RandomState(seed=0) num_samples = 4000 # 4000 samples is chosen to be enough to find discrepancies of # size 0.2 or more with assurance 1e-6, as confirmed here: d = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( num_samples, 0., 1., num_samples, 0., 1., false_fail_rate=1e-6, false_pass_rate=1e-6) d = self.evaluate(d) self.assertLess(d, 0.2) # Test that the test assertion agrees that the standard # uniform distribution has the same mean as itself. samples1 = rng.uniform(size=num_samples).astype(np.float32) samples2 = rng.uniform(size=num_samples).astype(np.float32) self.evaluate( st.assert_true_mean_equal_by_dkwm_two_sample(samples1, 0., 1., samples2, 0., 1., false_fail_rate=1e-6))
def test_dkwm_mean_two_sample_assertion(self): rng = np.random.RandomState(seed=0) num_samples = 4000 # 4000 samples is chosen to be enough to find discrepancies of # size 0.2 or more with assurance 1e-6, as confirmed here: d = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( num_samples, 0., 1., num_samples, 0., 1., false_fail_rate=1e-6, false_pass_rate=1e-6) d = self.evaluate(d) self.assertLess(d, 0.2) # Test that the test assertion agrees that the standard # uniform distribution has the same mean as itself. samples1 = rng.uniform(size=num_samples).astype(np.float32) samples2 = rng.uniform(size=num_samples).astype(np.float32) self.evaluate(st.assert_true_mean_equal_by_dkwm_two_sample( samples1, 0., 1., samples2, 0., 1., false_fail_rate=1e-6))
def test_dkwm_design_mean_two_sample_soundness(self): thresholds = [ 1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10 ] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] false_fail_rates, false_pass_rates = np.meshgrid(rates, rates) false_fail_rates = false_fail_rates.flatten().astype(np.float32) false_pass_rates = false_pass_rates.flatten().astype(np.float32) detectable_discrepancies = [] for false_pass_rate, false_fail_rate in zip(false_pass_rates, false_fail_rates): [sufficient_n1, sufficient_n2] = st.min_num_samples_for_dkwm_mean_two_sample_test( thresholds, low1=0., high1=1., low2=0., high2=1., false_fail_rate=false_fail_rate, false_pass_rate=false_pass_rate) detectable_discrepancies.append( st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( n1=sufficient_n1, low1=0., high1=1., n2=sufficient_n2, low2=0., high2=1., false_fail_rate=false_fail_rate, false_pass_rate=false_pass_rate)) detectable_discrepancies_ = self.evaluate(detectable_discrepancies) for discrepancies, false_pass_rate, false_fail_rate in zip( detectable_discrepancies_, false_pass_rates, false_fail_rates): below_threshold = discrepancies <= thresholds self.assertAllEqual( np.ones_like(below_threshold, np.bool), below_threshold, msg='false_pass_rate({}), false_fail_rate({})'.format( false_pass_rate, false_fail_rate))
def test_dkwm_design_mean_two_sample_soundness(self): thresholds = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] false_fail_rates, false_pass_rates = np.meshgrid(rates, rates) false_fail_rates = false_fail_rates.flatten().astype(np.float32) false_pass_rates = false_pass_rates.flatten().astype(np.float32) detectable_discrepancies = [] for false_pass_rate, false_fail_rate in zip( false_pass_rates, false_fail_rates): [ sufficient_n1, sufficient_n2 ] = st.min_num_samples_for_dkwm_mean_two_sample_test( thresholds, low1=0., high1=1., low2=0., high2=1., false_fail_rate=false_fail_rate, false_pass_rate=false_pass_rate) detectable_discrepancies.append( st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( n1=sufficient_n1, low1=0., high1=1., n2=sufficient_n2, low2=0., high2=1., false_fail_rate=false_fail_rate, false_pass_rate=false_pass_rate)) detectable_discrepancies_ = self.evaluate(detectable_discrepancies) for discrepancies, false_pass_rate, false_fail_rate in zip( detectable_discrepancies_, false_pass_rates, false_fail_rates): below_threshold = discrepancies <= thresholds self.assertAllEqual( np.ones_like(below_threshold, np.bool), below_threshold, msg='false_pass_rate({}), false_fail_rate({})'.format( false_pass_rate, false_fail_rate))