def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies( self): transaction_data = load_transaction_data(parse_dates=['date']) daily_summary = utils.summary_data_from_transaction_data( transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='D') hourly_summary = utils.summary_data_from_transaction_data( transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='h') thirty_days = 30 hours_in_day = 24 mbfg = estimation.ModifiedBetaGeoFitter() np.random.seed(0) mbfg.fit(daily_summary['frequency'], daily_summary['recency'], daily_summary['T']) thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time( thirty_days) np.random.seed(0) mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'], hourly_summary['T']) thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time( thirty_days * hours_in_day) npt.assert_almost_equal(thirty_day_prediction_from_daily_data, thirty_day_prediction_from_hourly_data)
def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self): transaction_data = load_transaction_data() summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31') bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal']) plt.figure() plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind='time_since_last_purchase') return plt.gcf()
def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self): transaction_data = load_transaction_data() summary = utils.calibration_and_holdout_data(transaction_data, "id", "date", "2014-09-01", "2014-12-31") bgf.fit(summary["frequency_cal"], summary["recency_cal"], summary["T_cal"]) plt.figure() plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind="time_since_last_purchase") return plt.gcf()
def test_plot_customer_alive_history(self): plt.figure() transaction_data = load_transaction_data() # yes I know this is using the wrong data, but I'm testing plotting here. id = 35 days_since_birth = 200 sp_trans = transaction_data.ix[transaction_data['id'] == id] plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date') return plt.gcf()
def test_plot_calibration_purchases_vs_holdout_purchases(self): from matplotlib import pyplot as plt transaction_data = load_transaction_data() summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31') bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal']) plt.figure() plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary) plt.show()
def test_plot_customer_alive_history(self): from matplotlib import pyplot as plt transaction_data = load_transaction_data() # yes I know this is using the wrong data, but I'm testing plotting here. id = 35 days_since_birth = 200 sp_trans = transaction_data.ix[transaction_data['id'] == id] plt.figure() plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date') plt.show()
def test_plot_customer_alive_history(self, bgf): from datetime import datetime, timedelta x_expected = np.arange(datetime(2014, 6, 30), datetime(2015, 1, 17), timedelta(days=1)) y_expected = [ 1.0, 1.0, 1.0, 0.75, 0.72, 0.69, 0.67, 0.64, 0.62, 0.59, 0.57, 0.55, 0.81, 0.79, 0.77, 0.75, 0.85, 0.87, 0.85, 0.82, 0.8, 0.89, 0.87, 0.84, 0.82, 0.79, 0.76, 0.73, 0.69, 0.66, 0.62, 0.59, 0.9, 0.89, 0.87, 0.85, 0.83, 0.81, 0.91, 0.9, 0.88, 0.87, 0.85, 0.83, 0.8, 0.78, 0.75, 0.73, 0.7, 0.67, 0.64, 0.61, 0.57, 0.54, 0.51, 0.48, 0.45, 0.42, 0.39, 0.37, 0.34, 0.32, 0.29, 0.27, 0.25, 0.23, 0.21, 0.2, 0.18, 0.92, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.85, 0.84, 0.82, 0.8, 0.79, 0.77, 0.75, 0.73, 0.71, 0.94, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.86, 0.85, 0.83, 0.82, 0.8, 0.78, 0.77, 0.94, 0.93, 0.93, 0.92, 0.91, 0.9, 0.89, 0.94, 0.94, 0.93, 0.92, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.95, 0.94, 0.94, 0.95, 0.95, 0.94, 0.93, 0.95, 0.95, 0.94, 0.94, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.86, 0.96, 0.95, 0.95, 0.94, 0.93, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.85, 0.84, 0.83, 0.81, 0.79, 0.78, 0.76, 0.74, 0.72, 0.7, 0.68, 0.66, 0.63, 0.61, 0.59, 0.56, 0.54, 0.52, 0.49, 0.47, 0.44, 0.42, 0.4, 0.38, 0.36, 0.34, 0.32, 0.3, 0.28, 0.26, 0.24, 0.23, 0.21, 0.2, 0.18, 0.17, 0.16, 0.15, 0.14, 0.13, 0.12, 0.11, 0.1, 0.1, 0.09, 0.08, 0.08, 0.07, 0.07, 0.06, 0.06, 0.05 ] labels = ['P_alive', 'purchases'] transaction_data = load_transaction_data() # yes I know this is using the wrong data, but I'm testing plotting here. id_user = 35 days_since_birth = 200 sp_trans = transaction_data.loc[transaction_data['id'] == id_user] ax = plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date') x, y = ax.lines[0].get_data() legend = ax.legend_ assert_allclose([np.round(e, 5) for e in y], y_expected, atol=0.01) # y has some weird array shapes assert_array_equal(x, x_expected) assert_array_equal([e.get_text() for e in legend.get_texts()], labels) assert_equal(ax.title.get_text(), "History of P_alive") assert_equal(ax.xaxis.get_label().get_text(), "") assert_equal(ax.yaxis.get_label().get_text(), "P_alive") plt.close()
def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(self): transaction_data = load_transaction_data(parse_dates=['date']) daily_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='D') hourly_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='h') thirty_days = 30 hours_in_day = 24 mbfg = estimation.ModifiedBetaGeoFitter() np.random.seed(0) mbfg.fit(daily_summary['frequency'], daily_summary['recency'], daily_summary['T']) thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days) np.random.seed(0) mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'], hourly_summary['T']) thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days * hours_in_day) npt.assert_almost_equal(thirty_day_prediction_from_daily_data, thirty_day_prediction_from_hourly_data)
def transaction_data(): return load_transaction_data()
#plt.show() #from lifetimes.plotting import plot_probability_alive_matrix #plot_probability_alive_matrix(bgf) #plt.show() t = 1 data['predicted_purchases'] = data.apply(lambda r: bgf.conditional_expected_number_of_purchases_up_to_time(t, r['frequency'], r['recency'], r['T']), axis = 1) print data.sort('predicted_purchases').tail(5) from lifetimes.datasets import load_transaction_data from lifetimes.utils import summary_data_from_transaction_data transaction_data = load_transaction_data() print transaction_data.head() print type(transaction_data) print transaction_data.columns print data.columns print data.head() t = 10 data['predicted_purchases'] = data.apply(lambda r: bgf.conditional_expected_number_of_purchases_up_to_time(t, r['frequency'], r['recency'], r['T']), axis = 1) print data from pandas import DataFrame d = [{'id': 1, 'R':23, 'F':12, 'M':12.5}, {'id': 2,'R':43, 'F':1, 'M':120.5}, {'id': 3,'R':203, 'F':2, 'M':19.5}] test = DataFrame(d) print test