Ejemplo n.º 1
0
    def test_fit_with_index(self, cdnow_customers):
        mbgf = lt.ModifiedBetaGeoFitter()
        index = range(len(cdnow_customers), 0, -1)
        mbgf.fit(cdnow_customers["frequency"],
                 cdnow_customers["recency"],
                 cdnow_customers["T"],
                 index=index)
        assert (mbgf.data.index == index).all() == True

        mbgf = lt.ModifiedBetaGeoFitter()
        mbgf.fit(cdnow_customers["frequency"],
                 cdnow_customers["recency"],
                 cdnow_customers["T"],
                 index=None)
        assert (mbgf.data.index == index).all() == False
Ejemplo n.º 2
0
    def test_conditional_probability_alive_returns_lessthan_1_if_no_repeat_purchases(
            self, cdnow_customers):
        mbfg = lt.ModifiedBetaGeoFitter()
        mbfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                 cdnow_customers["T"])

        assert mbfg.conditional_probability_alive(0, 1, 1) < 1.0
Ejemplo n.º 3
0
    def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(
            self):
        transaction_data = load_transaction_data(parse_dates=["date"])
        daily_summary = utils.summary_data_from_transaction_data(
            transaction_data,
            "id",
            "date",
            observation_period_end=max(transaction_data.date),
            freq="D")
        hourly_summary = utils.summary_data_from_transaction_data(
            transaction_data,
            "id",
            "date",
            observation_period_end=max(transaction_data.date),
            freq="h")
        thirty_days = 30
        hours_in_day = 24
        mbfg = lt.ModifiedBetaGeoFitter()

        np.random.seed(0)
        mbfg.fit(daily_summary["frequency"], daily_summary["recency"],
                 daily_summary["T"])
        thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(
            thirty_days)

        np.random.seed(0)
        mbfg.fit(hourly_summary["frequency"], hourly_summary["recency"],
                 hourly_summary["T"])
        thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(
            thirty_days * hours_in_day)

        npt.assert_almost_equal(thirty_day_prediction_from_daily_data,
                                thirty_day_prediction_from_hourly_data)
Ejemplo n.º 4
0
 def test_probability_of_n_purchases_up_to_time_same_as_R_BTYD(self):
     """ See https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """
     mbgf = lt.ModifiedBetaGeoFitter()
     mbgf.params_ = pd.Series({
         "r": 0.243,
         "alpha": 4.414,
         "a": 0.793,
         "b": 2.426
     })
     # probability that a customer will make 10 repeat transactions in the
     # time interval (0,2]
     expected = 1.07869e-07
     actual = mbgf.probability_of_n_purchases_up_to_time(2, 10)
     assert abs(expected - actual) < 10e-5
     # PMF
     expected = np.array([
         0.0019995214,
         0.0015170236,
         0.0011633150,
         0.0009003148,
         0.0007023638,
         0.0005517902,
         0.0004361913,
         0.0003467171,
         0.0002769613,
         0.0002222260,
     ])
     actual = np.array([
         mbgf.probability_of_n_purchases_up_to_time(30, n)
         for n in range(11, 21)
     ])
     npt.assert_allclose(expected, actual, rtol=0.5)
Ejemplo n.º 5
0
    def test_penalizer_term_will_shrink_coefs_to_0(self, cdnow_customers):
        mbfg_no_penalizer = lt.ModifiedBetaGeoFitter()
        mbfg_no_penalizer.fit(cdnow_customers["frequency"],
                              cdnow_customers["recency"], cdnow_customers["T"])
        params_1 = mbfg_no_penalizer.params_

        mbfg_with_penalizer = lt.ModifiedBetaGeoFitter(penalizer_coef=0.1)
        mbfg_with_penalizer.fit(cdnow_customers["frequency"],
                                cdnow_customers["recency"],
                                cdnow_customers["T"])
        params_2 = mbfg_with_penalizer.params_
        assert params_2.sum() < params_1.sum()

        mbfg_with_more_penalizer = lt.ModifiedBetaGeoFitter(penalizer_coef=1.0)
        mbfg_with_more_penalizer.fit(cdnow_customers["frequency"],
                                     cdnow_customers["recency"],
                                     cdnow_customers["T"])
        params_3 = mbfg_with_more_penalizer.params_
        assert params_3.sum() < params_2.sum()
Ejemplo n.º 6
0
 def test_params_out_is_close_to_BTYDplus(self, cdnow_customers):
     """ See https://github.com/mplatzer/BTYDplus """
     mbfg = lt.ModifiedBetaGeoFitter()
     mbfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
              cdnow_customers["T"])
     expected = np.array([0.525, 6.183, 0.891, 1.614])
     npt.assert_array_almost_equal(
         expected,
         np.array(mbfg._unload_params("r", "alpha", "a", "b")),
         decimal=3)
Ejemplo n.º 7
0
    def test_conditional_probability_alive_is_between_0_and_1(
            self, cdnow_customers):
        mbfg = lt.ModifiedBetaGeoFitter()
        mbfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                 cdnow_customers["T"])

        for i in range(0, 100, 10):
            for j in range(0, 100, 10):
                for k in range(j, 100, 10):
                    assert 0 <= mbfg.conditional_probability_alive(i, j,
                                                                   k) <= 1.0
Ejemplo n.º 8
0
    def test_scaling_inputs_gives_same_or_similar_results(
            self, cdnow_customers):
        mbgf = lt.ModifiedBetaGeoFitter()
        mbgf.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                 cdnow_customers["T"])
        scale = 10.0
        mbgf_with_large_inputs = lt.ModifiedBetaGeoFitter()
        mbgf_with_large_inputs.fit(cdnow_customers["frequency"],
                                   scale * cdnow_customers["recency"],
                                   scale * cdnow_customers["T"])
        assert mbgf_with_large_inputs._scale < 1.0

        assert (abs(
            mbgf_with_large_inputs.conditional_probability_alive(
                1, scale * 1, scale * 2) -
            mbgf.conditional_probability_alive(1, 1, 2)) < 10e-2)
        assert (abs(
            mbgf_with_large_inputs.conditional_probability_alive(
                1, scale * 2, scale * 10) -
            mbgf.conditional_probability_alive(1, 2, 10)) < 10e-2)
Ejemplo n.º 9
0
    def test_conditional_probability_alive_matrix(self, cdnow_customers):
        mbfg = lt.ModifiedBetaGeoFitter()
        mbfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
                 cdnow_customers["T"])
        Z = mbfg.conditional_probability_alive_matrix()
        max_t = int(mbfg.data["T"].max())

        for t_x in range(Z.shape[0]):
            for x in range(Z.shape[1]):
                assert Z[t_x][x] == mbfg.conditional_probability_alive(
                    x, t_x, max_t)
Ejemplo n.º 10
0
    def test_expectation_returns_same_value_Hardie_excel_sheet(
            self, cdnow_customers):
        mbfg = lt.ModifiedBetaGeoFitter()
        mbfg.fit(cdnow_customers["frequency"],
                 cdnow_customers["recency"],
                 cdnow_customers["T"],
                 tol=1e-6)

        times = np.array([0.1429, 1.0, 3.00, 31.8571, 32.00, 78.00])
        expected = np.array([0.0078, 0.0532, 0.1506, 1.0405, 1.0437, 1.8576])
        actual = mbfg.expected_number_of_purchases_up_to_time(times)
        npt.assert_allclose(actual, expected, rtol=0.05)
Ejemplo n.º 11
0
 def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(
         self, cdnow_customers):
     mbfg = lt.ModifiedBetaGeoFitter()
     mbfg.fit(cdnow_customers["frequency"], cdnow_customers["recency"],
              cdnow_customers["T"])
     x = 2
     t_x = 30.43
     T = 38.86
     t = 39
     expected = 1.226
     actual = mbfg.conditional_expected_number_of_purchases_up_to_time(
         t, x, t_x, T)
     assert abs(expected - actual) < 0.05