def test_fit_with_and_without_weights(self, donations): exploded_dataset = pd.DataFrame( columns=["frequency", "recency", "periods"]) for _, row in donations.iterrows(): exploded_dataset = exploded_dataset.append( pd.DataFrame( [[row["frequency"], row["recency"], row["periods"]]] * row["weights"], columns=["frequency", "recency", "periods"], )) exploded_dataset = exploded_dataset.astype(np.int64) exploded_dataset.to_csv("exploded.csv") assert exploded_dataset.shape[0] == donations["weights"].sum() bbtf_noweights = lt.BetaGeoBetaBinomFitter() bbtf_noweights.fit(exploded_dataset["frequency"], exploded_dataset["recency"], exploded_dataset["periods"]) bbtf = lt.BetaGeoBetaBinomFitter() bbtf.fit(donations["frequency"], donations["recency"], donations["periods"], donations["weights"]) npt.assert_array_almost_equal( np.array( bbtf_noweights._unload_params("alpha", "beta", "gamma", "delta")), np.array(bbtf._unload_params("alpha", "beta", "gamma", "delta")), decimal=4, )
def test_model_has_standard_error_variance_matrix_and_confidence_intervals_( self, donations): donations = donations bbtf = lt.BetaGeoBetaBinomFitter() bbtf.fit(donations["frequency"], donations["recency"], donations["periods"], donations["weights"]) assert hasattr(bbtf, "standard_errors_") assert hasattr(bbtf, "variance_matrix_") assert hasattr(bbtf, "confidence_intervals_")
def test_fit_with_index(self, donations): bbtf = lt.BetaGeoBetaBinomFitter() index = range(len(donations), 0, -1) bbtf.fit(donations["frequency"], donations["recency"], donations["periods"], donations["weights"], index=index) assert (bbtf.data.index == index).all() == True bbtf = lt.BetaGeoBetaBinomFitter() bbtf.fit(donations["frequency"], donations["recency"], donations["periods"], donations["weights"], index=None) assert (bbtf.data.index == index).all() == False
def test_params_out_is_close_to_Hardie_paper(self, donations): donations = donations bbtf = lt.BetaGeoBetaBinomFitter() bbtf.fit(donations["frequency"], donations["recency"], donations["periods"], donations["weights"]) expected = np.array([1.204, 0.750, 0.657, 2.783]) npt.assert_array_almost_equal( expected, np.array(bbtf._unload_params("alpha", "beta", "gamma", "delta")), decimal=2)
def test_expected_purchases_in_n_periods_returns_same_value_as_Hardie_excel_sheet( self, donations): """Total expected from Hardie's In-Sample Fit sheet.""" bbtf = lt.BetaGeoBetaBinomFitter() bbtf.fit(donations["frequency"], donations["recency"], donations["periods"], donations["weights"]) expected = np.array([3454.9, 1253.1]) # Cells C18 and C24 estimated = bbtf.expected_number_of_transactions_in_first_n_periods( 6).loc[[0, 6]].values.flatten() npt.assert_almost_equal(expected, estimated, decimal=0)
def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet( self, donations): """ Total from Hardie's Conditional Expectations (II) sheet. http://brucehardie.com/notes/010/BGBB_2011-01-20_XLSX.zip """ bbtf = lt.BetaGeoBetaBinomFitter() bbtf.fit(donations["frequency"], donations["recency"], donations["periods"], donations["weights"]) pred_purchases = ( bbtf.conditional_expected_number_of_purchases_up_to_time( 5, donations["frequency"], donations["recency"], donations["periods"]) * donations["weights"]) expected = 12884.2 # Sum of column F Exp Tot npt.assert_almost_equal(expected, pred_purchases.sum(), decimal=0)
def test_prob_alive_is_close_to_Hardie_paper_table_6(self, donations): """Table 6: P(Alive in 2002) as a Function of Recency and Frequency""" bbtf = lt.BetaGeoBetaBinomFitter() bbtf.fit(donations["frequency"], donations["recency"], donations["periods"], donations["weights"]) bbtf.data["prob_alive"] = bbtf.conditional_probability_alive( 1, donations["frequency"], donations["recency"], donations["periods"]) # Expected probabilities for last year 1995-0 repeat, 1999-2 repeat, 2001-6 repeat expected = np.array([0.11, 0.59, 0.93]) prob_list = np.zeros(3) prob_list[0] = bbtf.data[(bbtf.data["frequency"] == 0) & (bbtf.data["recency"] == 0)]["prob_alive"] prob_list[1] = bbtf.data[(bbtf.data["frequency"] == 2) & (bbtf.data["recency"] == 4)]["prob_alive"] prob_list[2] = bbtf.data[(bbtf.data["frequency"] == 6) & (bbtf.data["recency"] == 6)]["prob_alive"] npt.assert_array_almost_equal(expected, prob_list, decimal=2)