Ejemplo n.º 1
0
    def test_conditional_probability_alive_returns_1_if_no_repeat_purchases(
            self, cdnow_customers):
        bfg = estimation.BetaGeoFitter()
        bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'],
                cdnow_customers['T'])

        assert bfg.conditional_probability_alive(0, 1, 1) == 1.0
Ejemplo n.º 2
0
    def test_customer_lifetime_value_with_bgf(self):

        ggf = estimation.GammaGammaFitter()
        ggf.params_ = OrderedDict({'p': 6.25, 'q': 3.74, 'v': 15.44})

        bgf = estimation.BetaGeoFitter()
        bgf.fit(cdnow_customers_with_monetary_value['frequency'],
                cdnow_customers_with_monetary_value['recency'],
                cdnow_customers_with_monetary_value['T'],
                iterative_fitting=3)

        ggf_clv = ggf.customer_lifetime_value(
            bgf, cdnow_customers_with_monetary_value['frequency'],
            cdnow_customers_with_monetary_value['recency'],
            cdnow_customers_with_monetary_value['T'],
            cdnow_customers_with_monetary_value['monetary_value'])

        utils_clv = utils.customer_lifetime_value(
            bgf, cdnow_customers_with_monetary_value['frequency'],
            cdnow_customers_with_monetary_value['recency'],
            cdnow_customers_with_monetary_value['T'],
            ggf.conditional_expected_average_profit(
                cdnow_customers_with_monetary_value['frequency'],
                cdnow_customers_with_monetary_value['monetary_value']))
        npt.assert_equal(ggf_clv.values, utils_clv.values)
Ejemplo n.º 3
0
 def test_probability_of_n_purchases_up_to_time_same_as_R_BTYD(self):
     """ See https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """
     from collections import OrderedDict
     bgf = estimation.BetaGeoFitter()
     bgf.params_ = OrderedDict({
         'r': 0.243,
         'alpha': 4.414,
         'a': 0.793,
         'b': 2.426
     })
     # probability that a customer will make 10 repeat transactions in the
     # time interval (0,2]
     expected = 1.07869e-07
     actual = bgf.probability_of_n_purchases_up_to_time(2, 10)
     assert abs(expected - actual) < 10e-5
     # probability that a customer will make no repeat transactions in the
     # time interval (0,39]
     expected = 0.5737864
     actual = bgf.probability_of_n_purchases_up_to_time(39, 0)
     assert abs(expected - actual) < 10e-5
     # PMF
     expected = np.array([
         0.0019995214, 0.0015170236, 0.0011633150, 0.0009003148,
         0.0007023638, 0.0005517902, 0.0004361913, 0.0003467171,
         0.0002769613, 0.0002222260
     ])
     actual = np.array([
         bgf.probability_of_n_purchases_up_to_time(30, n)
         for n in range(11, 21)
     ])
     npt.assert_array_almost_equal(expected, actual, decimal=5)
Ejemplo n.º 4
0
    def test_save_load_bgnbd_no_data(self, cdnow_customers):
        bgf = estimation.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
        bgf.save_model(PATH_SAVE_BGNBD_MODEL, save_data=False)

        bgf_new = estimation.BetaGeoFitter()
        bgf_new.load_model(PATH_SAVE_BGNBD_MODEL)
        assert bgf_new.__dict__['penalizer_coef'] == bgf.__dict__['penalizer_coef']
        assert bgf_new.__dict__['_scale'] == bgf.__dict__['_scale']
        assert bgf_new.__dict__['params_'] == bgf.__dict__['params_']
        assert bgf_new.__dict__['_negative_log_likelihood_'] == bgf.__dict__['_negative_log_likelihood_']
        assert bgf_new.__dict__['predict'](1, 1, 2, 5) == bgf.__dict__['predict'](1, 1, 2, 5)
        assert bgf_new.expected_number_of_purchases_up_to_time(1) == bgf.expected_number_of_purchases_up_to_time(1)

        assert isinstance(bgf_new.__dict__['data'], list)
        # remove saved model
        os.remove(PATH_SAVE_BGNBD_MODEL)
Ejemplo n.º 5
0
    def test_conditional_probability_alive_is_between_0_and_1(self, cdnow_customers):
        bfg = estimation.BetaGeoFitter()
        bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])

        for i in range(0, 100, 10):
            for j in range(0, 100, 10):
                for k in range(j, 100, 10):
                    assert 0 <= bfg.conditional_probability_alive(i, j, k) <= 1.0
Ejemplo n.º 6
0
    def test_expectation_returns_same_value_Hardie_excel_sheet(self, cdnow_customers):
        bfg = estimation.BetaGeoFitter()
        bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], tol=1e-6)

        times = np.array([0.1429, 1.0, 3.00, 31.8571, 32.00, 78.00])
        expected = np.array([0.0078 ,0.0532 ,0.1506 ,1.0405,1.0437, 1.8576])
        actual = bfg.expected_number_of_purchases_up_to_time(times)
        npt.assert_array_almost_equal(actual, expected, decimal=3)
Ejemplo n.º 7
0
    def test_save_load_bgnbd(self, cdnow_customers):
        """Test saving and loading model for BG/NBD."""
        bgf = estimation.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
        bgf.save_model(PATH_SAVE_BGNBD_MODEL)

        bgf_new = estimation.BetaGeoFitter()
        bgf_new.load_model(PATH_SAVE_BGNBD_MODEL)
        assert bgf_new.__dict__['penalizer_coef'] == bgf.__dict__['penalizer_coef']
        assert bgf_new.__dict__['_scale'] == bgf.__dict__['_scale']
        assert bgf_new.__dict__['params_'] == bgf.__dict__['params_']
        assert bgf_new.__dict__['_negative_log_likelihood_'] == bgf.__dict__['_negative_log_likelihood_']
        assert (bgf_new.__dict__['data'] == bgf.__dict__['data']).all().all()
        assert bgf_new.__dict__['predict'](1, 1, 2, 5) == bgf.__dict__['predict'](1, 1, 2, 5)
        assert bgf_new.expected_number_of_purchases_up_to_time(1) == bgf.expected_number_of_purchases_up_to_time(1)
        # remove saved model
        os.remove(PATH_SAVE_BGNBD_MODEL)
Ejemplo n.º 8
0
    def test_save_load_bgnbd_no_data_replace_with_empty_str(self, cdnow_customers):
        """Test saving and loading model for BG/NBD without data with replaced value empty str."""
        bgf = estimation.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
        bgf.save_model(PATH_SAVE_BGNBD_MODEL, save_data=False, values_to_save=[''])

        bgf_new = estimation.BetaGeoFitter()
        bgf_new.load_model(PATH_SAVE_BGNBD_MODEL)
        assert bgf_new.__dict__['penalizer_coef'] == bgf.__dict__['penalizer_coef']
        assert bgf_new.__dict__['_scale'] == bgf.__dict__['_scale']
        assert bgf_new.__dict__['params_'] == bgf.__dict__['params_']
        assert bgf_new.__dict__['_negative_log_likelihood_'] == bgf.__dict__['_negative_log_likelihood_']
        assert bgf_new.__dict__['predict'](1, 1, 2, 5) == bgf.__dict__['predict'](1, 1, 2, 5)
        assert bgf_new.expected_number_of_purchases_up_to_time(1) == bgf.expected_number_of_purchases_up_to_time(1)

        assert bgf_new.__dict__['data'] is ''
        # remove saved model
        os.remove(PATH_SAVE_BGNBD_MODEL)
Ejemplo n.º 9
0
 def test_params_out_is_close_to_Hardie_paper(self, cdnow_customers):
     bfg = estimation.BetaGeoFitter()
     bfg.fit(cdnow_customers['x'], cdnow_customers['t_x'],
             cdnow_customers['T'])
     expected = np.array([0.243, 4.414, 0.793, 2.426])
     npt.assert_array_almost_equal(
         expected,
         np.array(bfg._unload_params('r', 'alpha', 'a', 'b')),
         decimal=3)
Ejemplo n.º 10
0
    def test_penalizer_term_will_shrink_coefs_to_0(self, cdnow_customers):
        bfg_no_penalizer = estimation.BetaGeoFitter()
        bfg_no_penalizer.fit(cdnow_customers['x'], cdnow_customers['t_x'],
                             cdnow_customers['T'])
        params_1 = np.array(list(bfg_no_penalizer.params_.values()))

        bfg_with_penalizer = estimation.BetaGeoFitter(penalizer_coef=0.1)
        bfg_with_penalizer.fit(cdnow_customers['x'], cdnow_customers['t_x'],
                               cdnow_customers['T'])
        params_2 = np.array(list(bfg_with_penalizer.params_.values()))
        assert np.all(params_2 < params_1)

        bfg_with_more_penalizer = estimation.BetaGeoFitter(penalizer_coef=10)
        bfg_with_more_penalizer.fit(cdnow_customers['x'],
                                    cdnow_customers['t_x'],
                                    cdnow_customers['T'])
        params_3 = np.array(list(bfg_with_more_penalizer.params_.values()))
        assert np.all(params_3 < params_2)
Ejemplo n.º 11
0
    def test_fit_with_index(self, cdnow_customers):
        bgf = estimation.BetaGeoFitter(penalizer_coef=0.0)
        index = range(len(cdnow_customers), 0, -1)
        bgf.fit(
            cdnow_customers['frequency'], 
            cdnow_customers['recency'], 
            cdnow_customers['T'],
            index=index
        )
        assert (bgf.data.index == index).all() == True

        bgf = estimation.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(
            cdnow_customers['frequency'], 
            cdnow_customers['recency'], 
            cdnow_customers['T'],
            index=None
        )
        assert (bgf.data.index == index).all() == False
Ejemplo n.º 12
0
    def test_conditional_probability_alive_matrix(self, cdnow_customers):
        bfg = estimation.BetaGeoFitter()
        bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
        Z = bfg.conditional_probability_alive_matrix()
        max_t = int(bfg.data['T'].max())
        assert Z[0][0] == 1

        for t_x in range(Z.shape[0]):
            for x in range(Z.shape[1]):
                assert Z[t_x][x] == bfg.conditional_probability_alive(x, t_x, max_t)
Ejemplo n.º 13
0
 def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(self, cdnow_customers):
     bfg = estimation.BetaGeoFitter()
     bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
     x = 2
     t_x = 30.43
     T = 38.86
     t = 39 
     expected = 1.226
     actual = bfg.conditional_expected_number_of_purchases_up_to_time(t, x, t_x, T) 
     assert abs(expected - actual) < 0.001
Ejemplo n.º 14
0
    def test_scaling_inputs_gives_same_or_similar_results(self):
        bgf = estimation.BetaGeoFitter()
        bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'],
                cdnow_customers['T'])
        scale = 10
        bgf_with_large_inputs = estimation.BetaGeoFitter()
        bgf_with_large_inputs.fit(cdnow_customers['frequency'],
                                  scale * cdnow_customers['recency'],
                                  scale * cdnow_customers['T'],
                                  iterative_fitting=2)
        assert bgf_with_large_inputs._scale < 1.

        assert abs(
            bgf_with_large_inputs.conditional_probability_alive(
                1, scale * 1, scale * 2) -
            bgf.conditional_probability_alive(1, 1, 2)) < 10e-5
        assert abs(
            bgf_with_large_inputs.conditional_probability_alive(
                1, scale * 2, scale * 10) -
            bgf.conditional_probability_alive(1, 2, 10)) < 10e-5
Ejemplo n.º 15
0
    def test_using_weights_col_gives_correct_results(self, cdnow_customers):
        cdnow_customers_weights = cdnow_customers.copy()
        cdnow_customers_weights['weights'] = 1.0
        cdnow_customers_weights = cdnow_customers_weights.groupby(
            ['frequency', 'recency', 'T'])['weights'].sum()
        cdnow_customers_weights = cdnow_customers_weights.reset_index()
        assert (cdnow_customers_weights['weights'] > 1).any()

        bgf_weights = estimation.BetaGeoFitter(penalizer_coef=0.0)
        bgf_weights.fit(cdnow_customers_weights['frequency'],
                        cdnow_customers_weights['recency'],
                        cdnow_customers_weights['T'],
                        weights=cdnow_customers_weights['weights'])

        bgf_no_weights = estimation.BetaGeoFitter(penalizer_coef=0.0)
        bgf_no_weights.fit(cdnow_customers['frequency'],
                           cdnow_customers['recency'], cdnow_customers['T'])

        npt.assert_almost_equal(
            np.array(bgf_no_weights._unload_params('r', 'alpha', 'a', 'b')),
            np.array(bgf_weights._unload_params('r', 'alpha', 'a', 'b')),
            decimal=4)
Ejemplo n.º 16
0
    def test_no_runtime_warnings_high_frequency(self, cdnow_customers):
        old_settings = np.seterr(all='raise')
        bgf = estimation.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(cdnow_customers['frequency'],
                cdnow_customers['recency'],
                cdnow_customers['T'],
                index=None)

        p_alive = bgf.conditional_probability_alive(frequency=1000,
                                                    recency=10,
                                                    T=100)
        np.seterr(**old_settings)
        assert p_alive == 0.