def test_save_load_no_data_replace_with_empty_str(self, cdnow_customers):
        """Test saving and loading model for BG/NBD without data with replaced value empty str."""
        bgf = lt.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])
        bgf.save_model(PATH_SAVE_BGNBD_MODEL,
                       save_data=False,
                       values_to_save=[""])

        bgf_new = lt.BetaGeoFitter()
        bgf_new.load_model(PATH_SAVE_BGNBD_MODEL)
        assert bgf_new.__dict__["penalizer_coef"] == bgf.__dict__[
            "penalizer_coef"]
        assert bgf_new.__dict__["_scale"] == bgf.__dict__["_scale"]
        assert bgf_new.__dict__["params_"].equals(bgf.__dict__["params_"])
        assert bgf_new.__dict__["_negative_log_likelihood_"] == bgf.__dict__[
            "_negative_log_likelihood_"]
        assert bgf_new.__dict__["predict"](1, 1, 2,
                                           5) == bgf.__dict__["predict"](1, 1,
                                                                         2, 5)
        assert bgf_new.expected_number_of_purchases_up_to_time(
            1) == bgf.expected_number_of_purchases_up_to_time(1)

        assert bgf_new.__dict__["data"] is ""
        # remove saved model
        os.remove(PATH_SAVE_BGNBD_MODEL)
    def test_using_weights_col_gives_correct_results(self, cdnow_customers):
        cdnow_customers_weights = cdnow_customers.copy()
        cdnow_customers_weights["weights"] = 1.0
        cdnow_customers_weights = cdnow_customers_weights.groupby(
            ["frequency", "recency", "T"])["weights"].sum()
        cdnow_customers_weights = cdnow_customers_weights.reset_index()
        assert (cdnow_customers_weights["weights"] > 1).any()

        bgf_weights = lt.BetaGeoFitter(penalizer_coef=0.0)
        bgf_weights.fit(
            cdnow_customers_weights["frequency"],
            cdnow_customers_weights["recency"],
            cdnow_customers_weights["T"],
            weights=cdnow_customers_weights["weights"],
        )

        bgf_no_weights = lt.BetaGeoFitter(penalizer_coef=0.0)
        bgf_no_weights.fit(cdnow_customers["frequency"],
                           cdnow_customers["recency"], cdnow_customers["T"])

        npt.assert_almost_equal(
            np.array(bgf_no_weights._unload_params("r", "alpha", "a", "b")),
            np.array(bgf_weights._unload_params("r", "alpha", "a", "b")),
            decimal=3,
        )
    def test_fit_with_index(self, cdnow_customers):
        bgf = lt.BetaGeoFitter(penalizer_coef=0.0)
        index = range(len(cdnow_customers), 0, -1)
        bgf.fit(cdnow_customers["frequency"],
                cdnow_customers["recency"],
                cdnow_customers["T"],
                index=index)
        assert (bgf.data.index == index).all() == True

        bgf = lt.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(cdnow_customers["frequency"],
                cdnow_customers["recency"],
                cdnow_customers["T"],
                index=None)
        assert (bgf.data.index == index).all() == False
    def test_conditional_probability_alive_returns_1_if_no_repeat_purchases(
            self, cdnow_customers):
        bfg = lt.BetaGeoFitter()
        bfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])

        assert bfg.conditional_probability_alive(0, 1, 1) == 1.0
 def test_params_out_is_close_to_Hardie_paper(self, cdnow_customers):
     bfg = lt.BetaGeoFitter()
     bfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
             cdnow_customers["T"])
     expected = np.array([0.243, 4.414, 0.793, 2.426])
     npt.assert_array_almost_equal(
         expected,
         np.array(bfg._unload_params("r", "alpha", "a", "b")),
         decimal=2)
    def test_penalizer_term_will_shrink_coefs_to_0(self, cdnow_customers):
        bfg_no_penalizer = lt.BetaGeoFitter()
        bfg_no_penalizer.fit(cdnow_customers["frequency"],
                             cdnow_customers["recency"], cdnow_customers["T"])
        params_1 = bfg_no_penalizer.params_

        bfg_with_penalizer = lt.BetaGeoFitter(penalizer_coef=0.1)
        bfg_with_penalizer.fit(cdnow_customers["frequency"],
                               cdnow_customers["recency"],
                               cdnow_customers["T"])
        params_2 = bfg_with_penalizer.params_
        assert np.all(params_2 < params_1)

        bfg_with_more_penalizer = lt.BetaGeoFitter(penalizer_coef=10)
        bfg_with_more_penalizer.fit(cdnow_customers["frequency"],
                                    cdnow_customers["recency"],
                                    cdnow_customers["T"])
        params_3 = bfg_with_more_penalizer.params_
        assert np.all(params_3 < params_2)
    def test_scaling_inputs_gives_same_or_similar_results(
            self, cdnow_customers):
        bgf = lt.BetaGeoFitter()
        bgf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])
        scale = 10
        bgf_with_large_inputs = lt.BetaGeoFitter()
        bgf_with_large_inputs.fit(cdnow_customers["frequency"],
                                  scale * cdnow_customers["recency"],
                                  scale * cdnow_customers["T"])
        assert bgf_with_large_inputs._scale < 1.0

        assert (abs(
            bgf_with_large_inputs.conditional_probability_alive(
                1, scale * 1, scale * 2) -
            bgf.conditional_probability_alive(1, 1, 2)) < 10e-5)
        assert (abs(
            bgf_with_large_inputs.conditional_probability_alive(
                1, scale * 2, scale * 10) -
            bgf.conditional_probability_alive(1, 2, 10)) < 10e-5)
    def test_conditional_probability_alive_is_between_0_and_1(
            self, cdnow_customers):
        bfg = lt.BetaGeoFitter()
        bfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])

        for i in range(0, 100, 10):
            for j in range(0, 100, 10):
                for k in range(j, 100, 10):
                    assert 0 <= bfg.conditional_probability_alive(i, j,
                                                                  k) <= 1.0
    def test_expectation_returns_same_value_Hardie_excel_sheet(
            self, cdnow_customers):
        bfg = lt.BetaGeoFitter()
        bfg.fit(cdnow_customers["frequency"],
                cdnow_customers["recency"],
                cdnow_customers["T"],
                tol=1e-6)

        times = np.array([0.1429, 1.0, 3.00, 31.8571, 32.00, 78.00])
        expected = np.array([0.0078, 0.0532, 0.1506, 1.0405, 1.0437, 1.8576])
        actual = bfg.expected_number_of_purchases_up_to_time(times)
        npt.assert_array_almost_equal(actual, expected, decimal=3)
    def test_conditional_probability_alive_matrix(self, cdnow_customers):
        bfg = lt.BetaGeoFitter()
        bfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])
        Z = bfg.conditional_probability_alive_matrix()
        max_t = int(bfg.data["T"].max())
        assert Z[0][0] == 1

        for t_x in range(Z.shape[0]):
            for x in range(Z.shape[1]):
                assert Z[t_x][x] == bfg.conditional_probability_alive(
                    x, t_x, max_t)
    def test_customer_lifetime_value_with_bgf(
            self, cdnow_customers_with_monetary_value):

        ggf = lt.GammaGammaFitter()
        ggf.params_ = pd.Series({"p": 6.25, "q": 3.74, "v": 15.44})

        bgf = lt.BetaGeoFitter()
        bgf.fit(
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
        )

        ggf_clv = ggf.customer_lifetime_value(
            bgf,
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
            cdnow_customers_with_monetary_value["monetary_value"],
        )

        utils_clv = utils._customer_lifetime_value(
            bgf,
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
            ggf.conditional_expected_average_profit(
                cdnow_customers_with_monetary_value["frequency"],
                cdnow_customers_with_monetary_value["monetary_value"]),
        )
        npt.assert_equal(ggf_clv.values, utils_clv.values)

        ggf_clv = ggf.customer_lifetime_value(
            bgf,
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
            cdnow_customers_with_monetary_value["monetary_value"],
            freq="H",
        )

        utils_clv = utils._customer_lifetime_value(
            bgf,
            cdnow_customers_with_monetary_value["frequency"],
            cdnow_customers_with_monetary_value["recency"],
            cdnow_customers_with_monetary_value["T"],
            ggf.conditional_expected_average_profit(
                cdnow_customers_with_monetary_value["frequency"],
                cdnow_customers_with_monetary_value["monetary_value"]),
            freq="H",
        )
        npt.assert_equal(ggf_clv.values, utils_clv.values)
 def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(
         self, cdnow_customers):
     bfg = lt.BetaGeoFitter()
     bfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
             cdnow_customers["T"])
     x = 2
     t_x = 30.43
     T = 38.86
     t = 39
     expected = 1.226
     actual = bfg.conditional_expected_number_of_purchases_up_to_time(
         t, x, t_x, T)
     assert abs(expected - actual) < 0.001
    def test_no_runtime_warnings_high_frequency(self, cdnow_customers):
        old_settings = np.seterr(all="raise")
        bgf = lt.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(cdnow_customers["frequency"],
                cdnow_customers["recency"],
                cdnow_customers["T"],
                index=None)

        p_alive = bgf.conditional_probability_alive(frequency=1000,
                                                    recency=10,
                                                    T=100)
        np.seterr(**old_settings)
        assert p_alive == 0.0
    def test_save_load(self, cdnow_customers):
        """Test saving and loading model for BG/NBD."""
        bgf = lt.BetaGeoFitter(penalizer_coef=0.0)
        bgf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                cdnow_customers["T"])
        bgf.save_model(PATH_SAVE_BGNBD_MODEL)

        bgf_new = lt.BetaGeoFitter()
        bgf_new.load_model(PATH_SAVE_BGNBD_MODEL)
        assert bgf_new.__dict__["penalizer_coef"] == bgf.__dict__[
            "penalizer_coef"]
        assert bgf_new.__dict__["_scale"] == bgf.__dict__["_scale"]
        assert bgf_new.__dict__["params_"].equals(bgf.__dict__["params_"])
        assert bgf_new.__dict__["_negative_log_likelihood_"] == bgf.__dict__[
            "_negative_log_likelihood_"]
        assert (bgf_new.__dict__["data"] == bgf.__dict__["data"]).all().all()
        assert bgf_new.__dict__["predict"](1, 1, 2,
                                           5) == bgf.__dict__["predict"](1, 1,
                                                                         2, 5)
        assert bgf_new.expected_number_of_purchases_up_to_time(
            1) == bgf.expected_number_of_purchases_up_to_time(1)
        # remove saved model
        os.remove(PATH_SAVE_BGNBD_MODEL)
Exemple #15
0
 def test_conditional_expectation_overflow_error_with_high_frequency(
         self, cdnow_customers):
     bfg = lt.BetaGeoFitter()
     bfg.params_ = OrderedDict([('r', 0.5458741247391189),
                                ('alpha', 13.409316394557274),
                                ('a', 0.0009994943799344323),
                                ('b', 0.03899022143378801)])
     t = 180
     frequency = 1000
     t_x = 0
     T = 5
     actual = bfg.conditional_expected_number_of_purchases_up_to_time(
         t, frequency, t_x, T)
     assert not np.isnan(np.array([actual], dtype=np.float64))
Exemple #16
0
 def test_conditional_expectation_with_negative_hyp2f1_term(
         self, cdnow_customers):
     bfg = lt.BetaGeoFitter()
     bfg.params_ = OrderedDict([('r', 0.5458741247391189),
                                ('alpha', 13.409316394557274),
                                ('a', 0.0009994943799344323),
                                ('b', 0.03899022143378801)])
     t = 180
     x = 0
     t_x = 0
     T = 5
     expected = 5.212
     actual = bfg.conditional_expected_number_of_purchases_up_to_time(
         t, x, t_x, T)
     assert abs(expected - actual) < 0.001
 def test_probability_of_n_purchases_up_to_time_same_as_R_BTYD(self):
     """ See https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """
     bgf = lt.BetaGeoFitter()
     bgf.params_ = pd.Series({
         "r": 0.243,
         "alpha": 4.414,
         "a": 0.793,
         "b": 2.426
     })
     # probability that a customer will make 10 repeat transactions in the
     # time interval (0,2]
     expected = 1.07869e-07
     actual = bgf.probability_of_n_purchases_up_to_time(2, 10)
     assert abs(expected - actual) < 10e-5
     # probability that a customer will make no repeat transactions in the
     # time interval (0,39]
     expected = 0.5737864
     actual = bgf.probability_of_n_purchases_up_to_time(39, 0)
     assert abs(expected - actual) < 10e-5
     # PMF
     expected = np.array([
         0.0019995214,
         0.0015170236,
         0.0011633150,
         0.0009003148,
         0.0007023638,
         0.0005517902,
         0.0004361913,
         0.0003467171,
         0.0002769613,
         0.0002222260,
     ])
     actual = np.array([
         bgf.probability_of_n_purchases_up_to_time(30, n)
         for n in range(11, 21)
     ])
     npt.assert_array_almost_equal(expected, actual, decimal=5)
Exemple #18
0
        calibration_period_end=training_end,
        observation_period_end=validation_end,
        monetary_value_col='Sales Total')

    rfm_train_test = rfm_train_test.loc[rfm_train_test['frequency_cal'] > 0, :]
    train = rfm_train_test[['frequency_cal', 'recency_cal', 'T_cal']]
    test = rfm_train_test[['frequency_holdout', 'duration_holdout']]
    print(rfm_train_test.head())
    print(rfm_train_test.shape)

    # ---------------------------------------------------------------------------------------------------------------------------

    # TRAIN
    # -------------------------------------------------------------------------------------------------------------------------
    #Beta Geometric / Negative Binomial distribution model (BG/NBD) to predict transactions (Frequency) and churn (Recency)')
    bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(rfm_train_test['frequency_cal'], rfm_train_test['recency_cal'],
            rfm_train_test['T_cal'])
    print(bgf.summary)

    lifetimes.plotting.plot_calibration_purchases_vs_holdout_purchases(
        bgf, rfm_train_test)
    plt.savefig('../images/split.png')
    plt.show()

    #_________________________________________________________________________________________________________________________

    #PREDICT
    # --------------------------------------------------------------------------------------------------------------------------
    # Probability Alive
    alive_prediction_bgf = bgf.conditional_probability_alive(