def test_results_with_different_exposures(self): variant_a = CountVariant("A", 50, 1.3, 260) # 38 variant_b = CountVariant("B", 30, 1.8, 360) # 16 variant_c = CountVariant("C", 20, 0.7, 140) # 29 probabilities = ClickhouseTrendExperimentResult.calculate_results( variant_a, [variant_b, variant_c]) # a is control self.assertAlmostEqual(probabilities[0], 0.86, places=1) self.assertAlmostEqual(probabilities[1], 0, places=1) self.assertAlmostEqual(probabilities[2], 0.13, places=1) computed_probability = calculate_probability_of_winning_for_target_count_data( variant_b, [variant_a, variant_c]) self.assertAlmostEqual(probabilities[1], computed_probability, places=1) computed_probability = calculate_probability_of_winning_for_target_count_data( variant_a, [variant_b, variant_c]) self.assertAlmostEqual(probabilities[0], computed_probability, places=1) p_value = calculate_p_value(variant_a, [variant_b, variant_c]) self.assertAlmostEqual(p_value, 0, places=3) significant, p_value = ClickhouseTrendExperimentResult.are_results_significant( variant_a, [variant_b, variant_c], probabilities) self.assertAlmostEqual(p_value, 1, places=3) # False because max probability is less than 0.9 self.assertEqual(significant, ExperimentSignificanceCode.LOW_WIN_PROBABILITY)
def test_calculate_results_small_numbers(self): variant_a = CountVariant("A", 2, 1, 200) variant_b = CountVariant("B", 1, 1, 200) probabilities = ClickhouseTrendExperimentResult.calculate_results( variant_a, [variant_b]) # a is control self.assertAlmostEqual(probabilities[1], 0.31, places=1) computed_probability = calculate_probability_of_winning_for_target_count_data( variant_b, [variant_a]) self.assertAlmostEqual(probabilities[1], computed_probability, places=1) p_value = calculate_p_value(variant_a, [variant_b]) self.assertAlmostEqual(p_value, 1, places=2)
def test_calculate_results(self): variant_a = CountVariant("A", 20, 1, 200) variant_b = CountVariant("B", 30, 1, 200) probabilities = ClickhouseTrendExperimentResult.calculate_results( variant_a, [variant_b]) # a is control self.assertAlmostEqual(probabilities[1], 0.92, places=1) computed_probability = calculate_probability_of_winning_for_target_count_data( variant_b, [variant_a]) self.assertAlmostEqual(probabilities[1], computed_probability, places=1) # p value testing matches https://www.evanmiller.org/ab-testing/poisson-means.html p_value = calculate_p_value(variant_a, [variant_b]) self.assertAlmostEqual(p_value, 0.20, places=2)
def test_calculate_results_with_three_variants(self): variant_a = CountVariant("A", 20, 1, 200) # control variant_b = CountVariant("B", 26, 1, 200) variant_c = CountVariant("C", 19, 1, 200) probabilities = ClickhouseTrendExperimentResult.calculate_results( variant_a, [variant_b, variant_c]) self.assertAlmostEqual(probabilities[0], 0.16, places=1) self.assertAlmostEqual(probabilities[1], 0.72, places=1) self.assertAlmostEqual(probabilities[2], 0.12, places=1) computed_probability = calculate_probability_of_winning_for_target_count_data( variant_a, [variant_b, variant_c]) self.assertAlmostEqual(probabilities[0], computed_probability, places=1) p_value = calculate_p_value(variant_a, [variant_b, variant_c]) self.assertAlmostEqual(p_value, 0.46, places=2)
def test_calculate_significance_when_target_variants_underperform(self): variant_a = CountVariant("A", 250, 1, 200) # control variant_b = CountVariant("B", 180, 1, 200) variant_c = CountVariant("C", 50, 1, 200) # in this case, should choose B as best test variant p_value = calculate_p_value(variant_a, [variant_b, variant_c]) self.assertAlmostEqual(p_value, 0.001, places=3) # manually assign probabilities to control test case significant, p_value = ClickhouseTrendExperimentResult.are_results_significant( variant_a, [variant_b, variant_c], [0.5, 0.4, 0.1]) self.assertAlmostEqual(p_value, 1, places=3) self.assertEqual(significant, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) # new B variant is worse, such that control probability ought to be high enough variant_b = CountVariant("B", 100, 1, 200) significant, p_value = ClickhouseTrendExperimentResult.are_results_significant( variant_a, [variant_b, variant_c], [0.95, 0.03, 0.02]) self.assertAlmostEqual(p_value, 0, places=3) self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT)