def test_plot_period_transactions_parento(self): pnbd = ParetoNBDFitter() pnbd.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1) plt.figure() plotting.plot_period_transactions(pnbd) return plt.gcf()
def test_plot_period_transactions_mbgf(self): mbgf = ModifiedBetaGeoFitter() mbgf.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1) plt.figure() plotting.plot_period_transactions(mbgf) return plt.gcf()
def evaluation_plots(plot_type): """ Evaluation Plots: - Tracking Cumulative Transactions - Tracking Daily Transactions - Frequency of Repeated Transactions - Calibration vs Holdout. Parameters ---------- plot_type: str. "tracking" - Tracking Cumulative and Tracking Daily Transactions. "repeated" - Frequency of Repeated Transactions. "calibration_holdout" - Calibration vs Holdout Purchases. """ # Loading Calibration Model. cal_bg_nbd = BetaGeoFitter(penalizer_coef=0.0) cal_bg_nbd.load_model(path="models/calibration_model.pkl") # Loading summary_cal_holdout dataset. summary_cal_holdout = pd.read_csv("datasets/summary_cal_holdout.csv") # Loading Transactions. transactions = pd.read_csv("datasets/transactions.csv") if plot_type == "tracking": fig = plt.figure(figsize=(20, 4)) plot_cumulative_transactions(model=cal_bg_nbd, transactions=transactions, datetime_col="order_purchase_timestamp", customer_id_col="customer_unique_id", t=604, t_cal=512, freq="D", ax=fig.add_subplot(121)) plot_incremental_transactions(model=cal_bg_nbd, transactions=transactions, datetime_col="order_purchase_timestamp", customer_id_col="customer_unique_id", t=604, t_cal=512, freq="D", ax=fig.add_subplot(122)) elif plot_type == "repeated": plot_period_transactions(model=cal_bg_nbd) elif plot_type == "calibration_holdout": plot_calibration_purchases_vs_holdout_purchases( model=cal_bg_nbd, calibration_holdout_matrix=summary_cal_holdout) return
def viz_bgf(self, t): #visualize customer frequency and recency matrix plot_frequency_recency_matrix(self.bgf, T=t, cmap='coolwarm') plt.savefig('sales_frequency_recency_matrix.png') plt.close() #visualize customer alive probability plot_probability_alive_matrix(self.bgf, cmap='coolwarm') plt.savefig('probability_alive_matrix.png') plt.close() #visualize expected repeat Purchases plot_expected_repeat_purchases(self.bgf) plt.savefig('ProbabilityExpectedRepeatPurchases.png') plt.close() #visualize the expected number of period transactions plot_period_transactions(self.bgf) plt.savefig('period_transactions.png') plt.close()
def test_plot_period_transactions_labels(self, bgf): expected = [1411, 439, 214, 100, 62, 38, 29, 1411, 439, 214, 100, 62, 38, 29] ax = plotting.plot_period_transactions(bgf, label=['A', 'B']) assert_allclose([p.get_height() for p in ax.patches], expected, rtol=0.3) assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions") assert_equal(ax.xaxis.get_label().get_text(), "Number of Calibration Period Transactions") assert_equal(ax.yaxis.get_label().get_text(), "Customers") assert_array_equal([label.get_text() for label in ax.legend_.get_texts()], ["A", "B"]) plt.close()
def test_plot_period_transactions_max_frequency(self, bgf): expected = [1411, 439, 214, 100, 62, 38, 29, 23, 7, 5, 5, 5, 1429, 470, 155, 89, 71, 39, 26, 20, 18, 9, 6, 7] ax = plotting.plot_period_transactions(bgf, max_frequency=12) assert_allclose([p.get_height() for p in ax.patches], expected, atol=50) # can be large relative differences for small counts assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions") assert_equal(ax.xaxis.get_label().get_text(), "Number of Calibration Period Transactions") assert_equal(ax.yaxis.get_label().get_text(), "Customers") assert_array_equal([label.get_text() for label in ax.legend_.get_texts()], ["Actual", "Model"]) plt.close()
def test_plot_period_transactions_mbgf(self, cd_data): mbgf = ModifiedBetaGeoFitter() mbgf.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1) ax = plotting.plot_period_transactions(mbgf) assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions") assert_equal(ax.xaxis.get_label().get_text(), "Number of Calibration Period Transactions") assert_equal(ax.yaxis.get_label().get_text(), "Customers") assert_array_equal([label.get_text() for label in ax.legend_.get_texts()], ["Actual", "Model"]) plt.close()
def test_plot_period_transactions(self): from matplotlib import pyplot as plt plotting.plot_period_transactions(bgf) plotting.plot_period_transactions(bgf, max_frequency=12) plotting.plot_period_transactions(bgf, label=['A', 'B']) plt.show()
def test_plot_period_transactions(self): from matplotlib import pyplot as plt plt.figure() plotting.plot_period_transactions(BG) plt.figure() plotting.plot_period_transactions(BG, bins=range(5)) plt.figure() plotting.plot_period_transactions(BG, label=['A', 'B']) plt.show()
def test_plot_period_transactions_mbgf(self, cd_data): expected = [ 1411, 439, 214, 100, 62, 38, 29, 1427, 410, 211, 118, 56, 47, 29 ] mbgf = ModifiedBetaGeoFitter() mbgf.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1) ax = plotting.plot_period_transactions(mbgf) assert_allclose([p.get_height() for p in ax.patches], expected, rtol=0.3) assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions") assert_equal(ax.xaxis.get_label().get_text(), "Number of Calibration Period Transactions") assert_equal(ax.yaxis.get_label().get_text(), "Customers") assert_array_equal( [label.get_text() for label in ax.legend_.get_texts()], ["Actual", "Model"]) plt.close()
def test_plot_period_transactions_pareto(self, cd_data): expected = [ 1411, 439, 214, 100, 62, 38, 29, 1199, 330, 160, 100, 64, 47, 34 ] pnbd = ParetoNBDFitter() pnbd.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1) ax = plotting.plot_period_transactions(pnbd) assert_allclose([p.get_height() for p in ax.patches], expected, rtol=0.3) assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions") assert_equal(ax.xaxis.get_label().get_text(), "Number of Calibration Period Transactions") assert_equal(ax.yaxis.get_label().get_text(), "Customers") assert_array_equal( [label.get_text() for label in ax.legend_.get_texts()], ["Actual", "Model"]) plt.close()
from lifetimes.plotting import plot_frequency_recency_matrix plot_frequency_recency_matrix(bgf) #%% from lifetimes.plotting import plot_probability_alive_matrix f=plot_probability_alive_matrix(bgf) t=52 X_train['predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time( t, X_train['txn_total'], X_train['recency_true']/7, X_train['T']/7) #%% from lifetimes.plotting import plot_period_transactions f = plot_period_transactions(bgf) #%% X_train.sort_values('predicted_purchases') #%% # X_train.sort_values(by='predicted_purchases').head(5) from lifetimes.plotting import plot_period_transactions f = plot_period_transactions(bgf) #%% import matplotlib.pyplot as plt f = plt.figure() plot_frequency_recency_matrix(bgf) f.savefig("foo.pdf", bbox_inches='tight') #%%
df["FREQUENCY"].plot(kind="hist", bins=50) # ========================================================================== # BG/NBD model # ========================================================================== bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(df["FREQUENCY"], df["RECENCY"], df["T"]) bgf.summary plotting.plot_frequency_recency_matrix(bgf) plotting.plot_probability_alive_matrix(bgf) # Repeat transaction model check plotting.plot_period_transactions(bgf) # ========================================================================== # Ranking reps from best to worst # ========================================================================== t = 1 df["predicted_purchases"] = bgf.conditional_expected_number_of_purchases_up_to_time( t, df["FREQUENCY"], df["RECENCY"], df["T"]) df.sort_values(by="predicted_purchases").tail(10) # ========================================================================== # Gamma Gamme Model # Model assumes that there is no relationship between the monetary value and the purchase frequency # ==========================================================================
def test_plot_period_transactions_labels(self): plt.figure() plotting.plot_period_transactions(bgf, label=['A', 'B']) return plt.gcf()
def test_plot_period_transactions_max_frequency(self): plt.figure() plotting.plot_period_transactions(bgf, max_frequency=12) return plt.gcf()
def test_plot_period_transactions(self): plt.figure() plotting.plot_period_transactions(bgf) return plt.gcf()
print(f"Actual Average Sales: {rc['monetary_value'].mean()}") df2 = df.drop_duplicates(subset=['email']) df2.shape #df.drop_duplicates(subset=['brand']) df_final = rc.merge(df2[['customer_id', 'email']], on='customer_id') df_final = df_final.merge(rfm_cluster[['customer_id', 'clusterID']], on='customer_id') df_final['wholesaler'] = np.where(df_final['predicted_cltv'] < 1000, 0, 1) df_final['churn_group'] = np.where(df_final['probability_alive'] < .5, 0, 1) df_final # Plots and Validation plot_period_transactions(bgf_mod) cal_hold = calibration_and_holdout_data( df, 'customer_id', 'date', calibration_period_end='2018-12-31', #3 years calibration observation_period_end='2020-12-31', #2 year holdout freq=frq) # plots the efficiacy of the model using the hold-out period plt.rcParams['figure.figsize'] = (20, 10) bgf = BetaGeoFitter() bgf.fit(cal_hold['frequency_cal'], cal_hold['recency_cal'], cal_hold['T_cal']) plot_calibration_purchases_vs_holdout_purchases(bgf, cal_hold)
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context): import matplotlib.pyplot matplotlib.pyplot.ioff() ## from lifetimes.utils import calibration_and_holdout_data from lifetimes.plotting import plot_frequency_recency_matrix from lifetimes.plotting import plot_probability_alive_matrix from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases from lifetimes.plotting import plot_period_transactions from lifetimes.plotting import plot_history_alive from lifetimes.plotting import plot_cumulative_transactions from lifetimes.utils import expected_cumulative_transactions from lifetimes.utils import summary_data_from_transaction_data from lifetimes import BetaGeoFitter from lifetimes import GammaGammaFitter import datetime import pandas as pd import datalab.storage as gcs conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn() print(src_client_id, context) query = context['templates_dict']['query'] data = pd.read_sql(query, con=conn) data.columns = data.columns.str.lower() print(data.head()) # Calculate RFM values# calibration_end_date = datetime.datetime(2018, 5, 24) training_rfm = calibration_and_holdout_data( transactions=data, customer_id_col='src_user_id', datetime_col='pickup_date', calibration_period_end=calibration_end_date, freq='D', monetary_value_col='price_total') bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'], training_rfm['T_cal']) print(bgf) # Matrix charts plot_period_transactions_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_period_transactions_chart.svg' plot_frequency_recency_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_frequency_recency_matrix.svg' plot_probability_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_probability_alive_matrix.svg' plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str( src_client_id) + '_plot_calibration_vs_holdout_purchases.svg' ax0 = plot_period_transactions(bgf, max_frequency=30) ax0.figure.savefig(plot_period_transactions_chart, format='svg') ax1 = plot_frequency_recency_matrix(bgf) ax1.figure.savefig(plot_frequency_recency_chart, format='svg') ax2 = plot_probability_alive_matrix(bgf) ax2.figure.savefig(plot_probability_chart, format='svg') ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf, training_rfm, n=50) ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg') full_rfm = summary_data_from_transaction_data( data, customer_id_col='src_user_id', datetime_col='pickup_date', monetary_value_col='price_total', datetime_format=None, observation_period_end=None, freq='D') returning_full_rfm = full_rfm[full_rfm['frequency'] > 0] ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(returning_full_rfm['frequency'], returning_full_rfm['monetary_value']) customer_lifetime = 30 # expected number of months lifetime of a customer clv = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions full_rfm['frequency'], full_rfm['recency'], full_rfm['T'], full_rfm['monetary_value'], time=customer_lifetime, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ).sort_values(ascending=False) full_rfm_with_value = full_rfm.join(clv) full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str( src_client_id) + '-icabbi-test.csv' full_rfm_with_value.to_csv(full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + full_rfm_file, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_period_transactions_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_frequency_recency_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_probability_chart, filename=full_rfm_file) GoogleCloudStorageHook( google_cloud_storage_conn_id='google_conn_default').upload( bucket=storage_bucket, object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" + plot_calibration_vs_holdout_chart, filename=full_rfm_file)
def visualizePlotPeriodTransaction(betaGeoFitterModel): plot_period_transactions(betaGeoFitterModel) pylab.savefig("PeriodTransactionPlot.png")