Beispiel #1
0
    def test_results_with_different_exposures(self):
        variant_a = CountVariant("A", 50, 1.3, 260)  # 38
        variant_b = CountVariant("B", 30, 1.8, 360)  # 16
        variant_c = CountVariant("C", 20, 0.7, 140)  # 29

        probabilities = ClickhouseTrendExperimentResult.calculate_results(
            variant_a, [variant_b, variant_c])  # a is control
        self.assertAlmostEqual(probabilities[0], 0.86, places=1)
        self.assertAlmostEqual(probabilities[1], 0, places=1)
        self.assertAlmostEqual(probabilities[2], 0.13, places=1)

        computed_probability = calculate_probability_of_winning_for_target_count_data(
            variant_b, [variant_a, variant_c])
        self.assertAlmostEqual(probabilities[1],
                               computed_probability,
                               places=1)

        computed_probability = calculate_probability_of_winning_for_target_count_data(
            variant_a, [variant_b, variant_c])
        self.assertAlmostEqual(probabilities[0],
                               computed_probability,
                               places=1)

        p_value = calculate_p_value(variant_a, [variant_b, variant_c])
        self.assertAlmostEqual(p_value, 0, places=3)

        significant, p_value = ClickhouseTrendExperimentResult.are_results_significant(
            variant_a, [variant_b, variant_c], probabilities)
        self.assertAlmostEqual(p_value, 1, places=3)
        # False because max probability is less than 0.9
        self.assertEqual(significant,
                         ExperimentSignificanceCode.LOW_WIN_PROBABILITY)
Beispiel #2
0
    def test_calculate_results_small_numbers(self):
        variant_a = CountVariant("A", 2, 1, 200)
        variant_b = CountVariant("B", 1, 1, 200)

        probabilities = ClickhouseTrendExperimentResult.calculate_results(
            variant_a, [variant_b])  # a is control
        self.assertAlmostEqual(probabilities[1], 0.31, places=1)

        computed_probability = calculate_probability_of_winning_for_target_count_data(
            variant_b, [variant_a])
        self.assertAlmostEqual(probabilities[1],
                               computed_probability,
                               places=1)

        p_value = calculate_p_value(variant_a, [variant_b])
        self.assertAlmostEqual(p_value, 1, places=2)
Beispiel #3
0
    def test_calculate_results(self):
        variant_a = CountVariant("A", 20, 1, 200)
        variant_b = CountVariant("B", 30, 1, 200)

        probabilities = ClickhouseTrendExperimentResult.calculate_results(
            variant_a, [variant_b])  # a is control
        self.assertAlmostEqual(probabilities[1], 0.92, places=1)

        computed_probability = calculate_probability_of_winning_for_target_count_data(
            variant_b, [variant_a])
        self.assertAlmostEqual(probabilities[1],
                               computed_probability,
                               places=1)

        # p value testing matches https://www.evanmiller.org/ab-testing/poisson-means.html
        p_value = calculate_p_value(variant_a, [variant_b])
        self.assertAlmostEqual(p_value, 0.20, places=2)
Beispiel #4
0
    def test_calculate_results_with_three_variants(self):
        variant_a = CountVariant("A", 20, 1, 200)  # control
        variant_b = CountVariant("B", 26, 1, 200)
        variant_c = CountVariant("C", 19, 1, 200)

        probabilities = ClickhouseTrendExperimentResult.calculate_results(
            variant_a, [variant_b, variant_c])
        self.assertAlmostEqual(probabilities[0], 0.16, places=1)
        self.assertAlmostEqual(probabilities[1], 0.72, places=1)
        self.assertAlmostEqual(probabilities[2], 0.12, places=1)

        computed_probability = calculate_probability_of_winning_for_target_count_data(
            variant_a, [variant_b, variant_c])
        self.assertAlmostEqual(probabilities[0],
                               computed_probability,
                               places=1)

        p_value = calculate_p_value(variant_a, [variant_b, variant_c])
        self.assertAlmostEqual(p_value, 0.46, places=2)
Beispiel #5
0
    def test_calculate_significance_when_target_variants_underperform(self):
        variant_a = CountVariant("A", 250, 1, 200)  # control
        variant_b = CountVariant("B", 180, 1, 200)
        variant_c = CountVariant("C", 50, 1, 200)

        # in this case, should choose B as best test variant
        p_value = calculate_p_value(variant_a, [variant_b, variant_c])
        self.assertAlmostEqual(p_value, 0.001, places=3)

        # manually assign probabilities to control test case
        significant, p_value = ClickhouseTrendExperimentResult.are_results_significant(
            variant_a, [variant_b, variant_c], [0.5, 0.4, 0.1])
        self.assertAlmostEqual(p_value, 1, places=3)
        self.assertEqual(significant,
                         ExperimentSignificanceCode.LOW_WIN_PROBABILITY)

        # new B variant is worse, such that control probability ought to be high enough
        variant_b = CountVariant("B", 100, 1, 200)

        significant, p_value = ClickhouseTrendExperimentResult.are_results_significant(
            variant_a, [variant_b, variant_c], [0.95, 0.03, 0.02])
        self.assertAlmostEqual(p_value, 0, places=3)
        self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT)