Ejemplo n.º 1
0
    def setup(self):
        self.data = pd.DataFrame([
            {
                'group': "1",
                'count': 5000,
                'sum': 10021.0,
                'sum_of_squares': 25142.0,
                'avg': 2.004210,
                'var': 1.0116668
            },
            {
                'group': "2",
                'count': 5000,
                'sum': 9892.0,
                'sum_of_squares': 24510.0,
                'avg': 1.978424,
                'var': 0.9881132
            },
        ])

        self.test = spotify_confidence.StudentsTTest(
            self.data,
            numerator_column='sum',
            numerator_sum_squares_column='sum_of_squares',
            denominator_column='count',
            categorical_group_columns='group',
            interval_size=0.99)
Ejemplo n.º 2
0
    def setup(self):
        np.random.seed(123)

        self.data = pd.DataFrame({
            'variation_name':
            ['test', 'control', 'test2', 'test', 'control', 'test2'],
            'nr_of_items': [1969, 312, 2955, 195, 24, 330],
            'nr_of_items_sumsq': [5767, 984, 8771, 553, 80, 1010],
            'users': [1009, 104, 1502, 100, 10, 150],
            'country': [
                'us',
                'us',
                'us',
                'gb',
                'gb',
                'gb',
            ]
        })

        self.test = spotify_confidence.StudentsTTest(
            self.data,
            numerator_column='nr_of_items',
            numerator_sum_squares_column='nr_of_items_sumsq',
            denominator_column='users',
            categorical_group_columns=['country', 'variation_name'],
            interval_size=0.95)
Ejemplo n.º 3
0
    def setup(self):

        self.data = pd.DataFrame({
            'variation_name': [
                'test', 'control', 'test2', 'test', 'control', 'test2', 'test',
                'control', 'test2', 'test', 'control', 'test2', 'test',
                'control', 'test2'
            ],
            'nr_of_items':
            [500, 8, 100, 510, 8, 100, 520, 9, 104, 530, 7, 100, 530, 8, 103],
            'nr_of_items_sumsq': [
                2500, 12, 150, 2510, 13, 140, 2520, 14, 154, 2530, 15, 160,
                2530, 16, 103
            ],
            'users': [
                1010, 22, 150, 1000, 20, 153, 1030, 23, 154, 1000, 20, 150,
                1040, 21, 155
            ],
            'days_since_reg': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5],
        })

        self.test = spotify_confidence.StudentsTTest(
            self.data,
            numerator_column='nr_of_items',
            numerator_sum_squares_column='nr_of_items_sumsq',
            denominator_column='users',
            categorical_group_columns='variation_name',
            ordinal_group_column='days_since_reg',
            interval_size=0.95)
Ejemplo n.º 4
0
    def test_p_value_is_symmetric_and_correct(self):
        df = pd.DataFrame({
            'group_name': ['Control', 'Test'],
            'users': [48351, 50571],
            'nr_of_items': [1.438602e+06, 1.521974e+06],
            'nr_of_items_sumsq': [7.330581e+07, 7.862121e+07]
        })

        ttest = spotify_confidence.StudentsTTest(
            data_frame=df,
            denominator_column='users',
            numerator_column='nr_of_items',
            numerator_sum_squares_column='nr_of_items_sumsq',
            categorical_group_columns='group_name')

        diff_summary_1 = ttest.difference(level_1='Control', level_2='Test')
        diff_summary_2 = ttest.difference(level_1='Test', level_2='Control')

        assert (diff_summary_1['p-value'].iloc[0] ==
                diff_summary_2['p-value'].iloc[0])
        assert (np.isclose(diff_summary_1['p-value'].iloc[0], .03334, 0.01))
Ejemplo n.º 5
0
    def setup(self):
        np.random.seed(123)

        self.data = pd.DataFrame({
            'variation_name':
            ['test', 'control', 'test2', 'test', 'control', 'test2'],
            'success': [500, 42, 1005, 50, 4, 100],
            'total': [1009, 104, 1502, 100, 10, 150],
            'country': [
                'us',
                'us',
                'us',
                'gb',
                'gb',
                'gb',
            ]
        })

        self.test = spotify_confidence.StudentsTTest(
            self.data,
            numerator_column='success',
            numerator_sum_squares_column='success',
            denominator_column='total',
            categorical_group_columns=['country', 'variation_name'])
Ejemplo n.º 6
0
    def setup(self):
        self.data = pd.DataFrame({
            'variation_name': [
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
                'test',
                'control',
                'test2',
            ],
            'nr_of_items': [
                500,
                8,
                100,
                510,
                8,
                100,
                520,
                9,
                104,
                530,
                7,
                100,
                530,
                8,
                103,
                500,
                8,
                100,
                510,
                8,
                100,
                520,
                9,
                104,
                530,
                7,
                100,
                530,
                8,
                103,
            ],
            'nr_of_items_sumsq': [
                1010,
                32,
                250,
                1000,
                30,
                253,
                1030,
                33,
                254,
                1000,
                30,
                250,
                1040,
                31,
                255,
                1010,
                22,
                150,
                1000,
                20,
                153,
                1030,
                23,
                154,
                1000,
                20,
                150,
                1040,
                21,
                155,
            ],
            'users': [
                2010,
                42,
                250,
                2000,
                40,
                253,
                2030,
                43,
                254,
                2000,
                40,
                250,
                2040,
                41,
                255,
                1010,
                22,
                150,
                1000,
                20,
                153,
                1030,
                23,
                154,
                1000,
                20,
                150,
                1040,
                21,
                155,
            ],
            'days_since_reg': [
                1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 1, 1, 1, 2, 2, 2,
                3, 3, 3, 4, 4, 4, 5, 5, 5
            ],
            'country': [
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'us',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
                'gb',
            ]
        })

        self.test = spotify_confidence.StudentsTTest(
            self.data,
            numerator_column='nr_of_items',
            numerator_sum_squares_column='nr_of_items_sumsq',
            denominator_column='users',
            categorical_group_columns=['variation_name', 'country'],
            ordinal_group_column='days_since_reg')