Esempio n. 1
0
    def test_scaling_inputs_gives_same_or_similar_results(self, cdnow_customers):
        mbgf = estimation.ModifiedBetaGeoFitter()
        mbgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
        scale = 10.
        mbgf_with_large_inputs = estimation.ModifiedBetaGeoFitter()
        mbgf_with_large_inputs.fit(cdnow_customers['frequency'], scale * cdnow_customers['recency'], scale * cdnow_customers['T'], iterative_fitting=2)
        assert mbgf_with_large_inputs._scale < 1.

        assert abs(mbgf_with_large_inputs.conditional_probability_alive(1, scale * 1, scale * 2) - mbgf.conditional_probability_alive(1, 1, 2)) < 10e-2
        assert abs(mbgf_with_large_inputs.conditional_probability_alive(1, scale * 2, scale * 10) - mbgf.conditional_probability_alive(1, 2, 10)) < 10e-2
Esempio n. 2
0
    def test_fit_method_allows_for_better_accuracy_by_using_iterative_fitting(self, cdnow_customers):
        mbfg1 = estimation.ModifiedBetaGeoFitter()
        mbfg2 = estimation.ModifiedBetaGeoFitter()

        np.random.seed(0)
        mbfg1.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])

        np.random.seed(0)
        mbfg2.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], iterative_fitting=5)
        assert mbfg1._negative_log_likelihood_ >= mbfg2._negative_log_likelihood_
Esempio n. 3
0
    def test_mgbf_does_not_hang_for_small_datasets_but_can_be_improved_with_iterative_fitting(self, cdnow_customers):
        reduced_dataset = cdnow_customers.ix[:2]
        mbfg1 = estimation.ModifiedBetaGeoFitter()
        mbfg2 = estimation.ModifiedBetaGeoFitter()

        np.random.seed(0)
        mbfg1.fit(reduced_dataset['frequency'], reduced_dataset['recency'], reduced_dataset['T'])

        np.random.seed(0)
        mbfg2.fit(reduced_dataset['frequency'], reduced_dataset['recency'], reduced_dataset['T'], iterative_fitting=10)
        assert mbfg1._negative_log_likelihood_ >= mbfg2._negative_log_likelihood_
Esempio n. 4
0
    def test_penalizer_term_will_shrink_coefs_to_0(self, cdnow_customers):
        mbfg_no_penalizer = estimation.ModifiedBetaGeoFitter()
        mbfg_no_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
        params_1 = np.array(list(mbfg_no_penalizer.params_.values()))

        mbfg_with_penalizer = estimation.ModifiedBetaGeoFitter(penalizer_coef=0.1)
        mbfg_with_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], iterative_fitting=3)
        params_2 = np.array(list(mbfg_with_penalizer.params_.values()))
        assert params_2.sum() < params_1.sum()

        mbfg_with_more_penalizer = estimation.ModifiedBetaGeoFitter(penalizer_coef=1.)
        mbfg_with_more_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], iterative_fitting=5)
        params_3 = np.array(list(mbfg_with_more_penalizer.params_.values()))
        assert params_3.sum() < params_2.sum()
Esempio n. 5
0
    def test_fit_with_index(self, cdnow_customers):
        mbgf = estimation.ModifiedBetaGeoFitter()
        index = range(len(cdnow_customers), 0, -1)
        mbgf.fit(cdnow_customers['frequency'],
                 cdnow_customers['recency'],
                 cdnow_customers['T'],
                 index=index)
        assert (mbgf.data.index == index).all() == True

        mbgf = estimation.ModifiedBetaGeoFitter()
        mbgf.fit(cdnow_customers['frequency'],
                 cdnow_customers['recency'],
                 cdnow_customers['T'],
                 index=None)
        assert (mbgf.data.index == index).all() == False
Esempio n. 6
0
    def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(
            self):
        transaction_data = load_transaction_data(parse_dates=['date'])
        daily_summary = utils.summary_data_from_transaction_data(
            transaction_data,
            'id',
            'date',
            observation_period_end=max(transaction_data.date),
            freq='D')
        hourly_summary = utils.summary_data_from_transaction_data(
            transaction_data,
            'id',
            'date',
            observation_period_end=max(transaction_data.date),
            freq='h')
        thirty_days = 30
        hours_in_day = 24
        mbfg = estimation.ModifiedBetaGeoFitter()

        np.random.seed(0)
        mbfg.fit(daily_summary['frequency'], daily_summary['recency'],
                 daily_summary['T'])
        thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(
            thirty_days)

        np.random.seed(0)
        mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'],
                 hourly_summary['T'])
        thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(
            thirty_days * hours_in_day)

        npt.assert_almost_equal(thirty_day_prediction_from_daily_data,
                                thirty_day_prediction_from_hourly_data)
Esempio n. 7
0
 def test_probability_of_n_purchases_up_to_time_same_as_R_BTYD(self):
     """ See https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """
     from collections import OrderedDict
     mbgf = estimation.ModifiedBetaGeoFitter()
     mbgf.params_ = OrderedDict({
         'r': 0.243,
         'alpha': 4.414,
         'a': 0.793,
         'b': 2.426
     })
     # probability that a customer will make 10 repeat transactions in the
     # time interval (0,2]
     expected = 1.07869e-07
     actual = mbgf.probability_of_n_purchases_up_to_time(2, 10)
     assert abs(expected - actual) < 10e-5
     # PMF
     expected = np.array([
         0.0019995214, 0.0015170236, 0.0011633150, 0.0009003148,
         0.0007023638, 0.0005517902, 0.0004361913, 0.0003467171,
         0.0002769613, 0.0002222260
     ])
     actual = np.array([
         mbgf.probability_of_n_purchases_up_to_time(30, n)
         for n in range(11, 21)
     ])
     npt.assert_allclose(expected, actual, rtol=0.5)
Esempio n. 8
0
    def test_conditional_probability_alive_returns_lessthan_1_if_no_repeat_purchases(
            self, cdnow_customers):
        mbfg = estimation.ModifiedBetaGeoFitter()
        mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'],
                 cdnow_customers['T'])

        assert mbfg.conditional_probability_alive(0, 1, 1) < 1.0
Esempio n. 9
0
    def test_conditional_probability_alive_is_between_0_and_1(self, cdnow_customers):
        mbfg = estimation.ModifiedBetaGeoFitter()
        mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])

        for i in range(0, 100, 10):
            for j in range(0, 100, 10):
                for k in range(j, 100, 10):
                    assert 0 <= mbfg.conditional_probability_alive(i, j, k) <= 1.0
Esempio n. 10
0
    def test_expectation_returns_same_value_Hardie_excel_sheet(self, cdnow_customers):
        mbfg = estimation.ModifiedBetaGeoFitter()
        mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], tol=1e-6, iterative_fitting=3)

        times = np.array([0.1429, 1.0, 3.00, 31.8571, 32.00, 78.00])
        expected = np.array([0.0078, 0.0532, 0.1506, 1.0405, 1.0437, 1.8576])
        actual = mbfg.expected_number_of_purchases_up_to_time(times)
        npt.assert_allclose(actual, expected, rtol=0.05)
Esempio n. 11
0
    def test_conditional_probability_alive_matrix(self, cdnow_customers):
        mbfg = estimation.ModifiedBetaGeoFitter()
        mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
        Z = mbfg.conditional_probability_alive_matrix()
        max_t = int(mbfg.data['T'].max())

        for t_x in range(Z.shape[0]):
            for x in range(Z.shape[1]):
                assert Z[t_x][x] == mbfg.conditional_probability_alive(x, t_x, max_t)
Esempio n. 12
0
 def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(self, cdnow_customers):
     mbfg = estimation.ModifiedBetaGeoFitter()
     mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
     x = 2
     t_x = 30.43
     T = 38.86
     t = 39
     expected = 1.226
     actual = mbfg.conditional_expected_number_of_purchases_up_to_time(t, x, t_x, T)
     assert abs(expected - actual) < 0.05
Esempio n. 13
0
 def test_params_out_is_close_to_BTYDplus(self, cdnow_customers):
     """ See https://github.com/mplatzer/BTYDplus """
     mbfg = estimation.ModifiedBetaGeoFitter()
     mbfg.fit(cdnow_customers['frequency'],
              cdnow_customers['recency'],
              cdnow_customers['T'],
              iterative_fitting=3)
     expected = np.array([0.525, 6.183, 0.891, 1.614])
     npt.assert_array_almost_equal(
         expected,
         np.array(mbfg._unload_params('r', 'alpha', 'a', 'b')),
         decimal=3)