Exemple #1
0
    def test_plot_probability_alive_matrix(self):
        from matplotlib import pyplot as plt

        plt.figure()
        plotting.plot_probability_alive_matrix(bgf)

        plt.figure()
        plotting.plot_probability_alive_matrix(bgf, max_recency=100, max_frequency=50)

        plt.show()
Exemple #2
0
    def test_plot_probability_alive_matrix(self):
        from matplotlib import pyplot as plt

        plt.figure()
        plotting.plot_probability_alive_matrix(bgf)

        plt.figure()
        plotting.plot_probability_alive_matrix(bgf,
                                               max_recency=100,
                                               max_frequency=50)

        plt.show()
    def test_plot_probability_alive_matrix_max_frequency_max_recency(
            self, bgf):
        shape = (101, 101)
        col_idx = 15
        col = [
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0.001, 0.001, 0.001, 0.002, 0.002, 0.003, 0.004,
            0.006, 0.008, 0.010, 0.012, 0.016, 0.020, 0.025, 0.031, 0.039,
            0.048, 0.059, 0.072, 0.088, 0.106, 0.126, 0.150, 0.178, 0.208,
            0.242, 0.278, 0.318, 0.359, 0.403, 0.447, 0.492, 0.536, 0.579,
            0.621, 0.660, 0.697, 0.731, 0.763, 0.791, 0.817, 0.839, 0.860,
            0.877, 0.893, 0.907, 0.919, 0.929, 0.939, 0.947, 0.953
        ]

        ax = plotting.plot_probability_alive_matrix(bgf,
                                                    max_frequency=100,
                                                    max_recency=100)
        ar = ax.get_images()[0].get_array()
        assert_array_equal(ar.shape, shape)
        assert_allclose(ar[:, col_idx].data, col,
                        atol=0.01)  # only test one column for brevity
        assert_equal(
            ax.title.get_text(),
            "Probability Customer is Alive,\nby Frequency and Recency of a Customer"
        )
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Customer's Historical Frequency")
        assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency")
        plt.close()
    def test_plot_probability_alive_matrix_max_recency(self, bgf):
        shape = (101, 30)
        col_idx = 25
        col = [
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0.001, 0.001, 0.002, 0.003, 0.004, 0.006, 0.008, 0.012, 0.017,
            0.023, 0.032, 0.043, 0.058, 0.078, 0.103, 0.134, 0.173, 0.219,
            0.273, 0.333, 0.399, 0.468, 0.537, 0.604, 0.667, 0.724, 0.774,
            0.816, 0.852, 0.882, 0.906, 0.925, 0.941, 0.953, 0.963, 0.970
        ]

        ax = plotting.plot_probability_alive_matrix(bgf, max_recency=100)
        ar = ax.get_images()[0].get_array()
        assert_array_equal(ar.shape, shape)
        assert_allclose(ar[:, col_idx].data, col,
                        atol=0.01)  # only test one column for brevity
        assert_equal(
            ax.title.get_text(),
            "Probability Customer is Alive,\nby Frequency and Recency of a Customer"
        )
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Customer's Historical Frequency")
        assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency")
        plt.close()
    def test_plot_probability_alive_matrix_max_frequency(self, bgf):
        shape = (39, 101)
        row_idx = 35
        row = [
            1.0, 0.736, 0.785, 0.814, 0.833, 0.846, 0.855, 0.862, 0.866, 0.869,
            0.871, 0.872, 0.873, 0.873, 0.872, 0.871, 0.869, 0.867, 0.865,
            0.862, 0.859, 0.856, 0.852, 0.848, 0.844, 0.839, 0.834, 0.829,
            0.823, 0.817, 0.811, 0.805, 0.798, 0.791, 0.783, 0.775, 0.767,
            0.759, 0.750, 0.741, 0.731, 0.721, 0.711, 0.701, 0.690, 0.679,
            0.667, 0.656, 0.644, 0.631, 0.619, 0.606, 0.593, 0.580, 0.566,
            0.552, 0.539, 0.525, 0.511, 0.496, 0.482, 0.468, 0.454, 0.439,
            0.425, 0.411, 0.397, 0.383, 0.369, 0.355, 0.342, 0.329, 0.316,
            0.303, 0.290, 0.278, 0.266, 0.254, 0.243, 0.232, 0.221, 0.211,
            0.201, 0.191, 0.182, 0.173, 0.164, 0.156, 0.148, 0.140, 0.133,
            0.126, 0.119, 0.113, 0.106, 0.101, 0.095, 0.090, 0.085, 0.080,
            0.075
        ]

        ax = plotting.plot_probability_alive_matrix(bgf, max_frequency=100)
        ar = ax.get_images()[0].get_array()
        assert_array_equal(ar.shape, shape)
        assert_allclose(ar[row_idx, :].data, row,
                        atol=0.01)  # only test one row for brevity
        assert_equal(
            ax.title.get_text(),
            "Probability Customer is Alive,\nby Frequency and Recency of a Customer"
        )
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Customer's Historical Frequency")
        assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency")
        plt.close()
 def viz_bgf(self, t):
     #visualize customer frequency and recency matrix
     plot_frequency_recency_matrix(self.bgf, T=t, cmap='coolwarm')
     plt.savefig('sales_frequency_recency_matrix.png')
     plt.close()
     #visualize customer alive probability
     plot_probability_alive_matrix(self.bgf, cmap='coolwarm')
     plt.savefig('probability_alive_matrix.png')
     plt.close()
     #visualize expected repeat Purchases
     plot_expected_repeat_purchases(self.bgf)
     plt.savefig('ProbabilityExpectedRepeatPurchases.png')
     plt.close()
     #visualize the expected number of period transactions
     plot_period_transactions(self.bgf)
     plt.savefig('period_transactions.png')
     plt.close()
Exemple #7
0
    def test_plot_probability_alive_matrix(self, bgf):
        shape = (39, 30)
        row_idx = 35
        row = [1.0, 0.736, 0.785, 0.814, 0.833, 0.846, 0.855, 0.862, 0.866, 0.869, 0.871,
               0.872, 0.873, 0.873, 0.872, 0.871, 0.869, 0.867, 0.865, 0.862, 0.859, 0.856,
               0.852, 0.848, 0.844, 0.839, 0.834, 0.829, 0.823, 0.817]

        ax = plotting.plot_probability_alive_matrix(bgf)
        ar = ax.get_images()[0].get_array()
        assert_array_equal(ar.shape, shape)
        assert_allclose(ar[row_idx, :].data, row, atol=0.01)  # only test one row for brevity
        assert_equal(ax.title.get_text(), "Probability Customer is Alive,\nby Frequency and Recency of a Customer")
        assert_equal(ax.xaxis.get_label().get_text(), "Customer's Historical Frequency")
        assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency")
        plt.close()
Exemple #8
0
 def test_plot_probability_alive_matrix_max_frequency(self):
     plt.figure()
     plotting.plot_probability_alive_matrix(bgf, max_frequency=100)
     return plt.gcf()
Exemple #9
0
 def test_plot_probability_alive_matrix(self):
     plt.figure()
     plotting.plot_probability_alive_matrix(bgf)
     return plt.gcf()
Exemple #10
0
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context):
    import matplotlib.pyplot
    matplotlib.pyplot.ioff()
    ##
    from lifetimes.utils import calibration_and_holdout_data
    from lifetimes.plotting import plot_frequency_recency_matrix
    from lifetimes.plotting import plot_probability_alive_matrix
    from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases
    from lifetimes.plotting import plot_period_transactions
    from lifetimes.plotting import plot_history_alive
    from lifetimes.plotting import plot_cumulative_transactions
    from lifetimes.utils import expected_cumulative_transactions
    from lifetimes.utils import summary_data_from_transaction_data
    from lifetimes import BetaGeoFitter
    from lifetimes import GammaGammaFitter
    import datetime
    import pandas as pd
    import datalab.storage as gcs
    conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn()
    print(src_client_id, context)
    query = context['templates_dict']['query']
    data = pd.read_sql(query, con=conn)
    data.columns = data.columns.str.lower()
    print(data.head())

    # Calculate RFM values#
    calibration_end_date = datetime.datetime(2018, 5, 24)
    training_rfm = calibration_and_holdout_data(
        transactions=data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        calibration_period_end=calibration_end_date,
        freq='D',
        monetary_value_col='price_total')
    bgf = BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'],
            training_rfm['T_cal'])
    print(bgf)

    # Matrix charts
    plot_period_transactions_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_period_transactions_chart.svg'
    plot_frequency_recency_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_frequency_recency_matrix.svg'
    plot_probability_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_probability_alive_matrix.svg'
    plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_calibration_vs_holdout_purchases.svg'

    ax0 = plot_period_transactions(bgf, max_frequency=30)
    ax0.figure.savefig(plot_period_transactions_chart, format='svg')
    ax1 = plot_frequency_recency_matrix(bgf)
    ax1.figure.savefig(plot_frequency_recency_chart, format='svg')
    ax2 = plot_probability_alive_matrix(bgf)
    ax2.figure.savefig(plot_probability_chart, format='svg')
    ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf,
                                                          training_rfm,
                                                          n=50)
    ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg')
    full_rfm = summary_data_from_transaction_data(
        data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        monetary_value_col='price_total',
        datetime_format=None,
        observation_period_end=None,
        freq='D')
    returning_full_rfm = full_rfm[full_rfm['frequency'] > 0]
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(returning_full_rfm['frequency'],
            returning_full_rfm['monetary_value'])

    customer_lifetime = 30  # expected number of months lifetime of a customer
    clv = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        full_rfm['frequency'],
        full_rfm['recency'],
        full_rfm['T'],
        full_rfm['monetary_value'],
        time=customer_lifetime,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    ).sort_values(ascending=False)
    full_rfm_with_value = full_rfm.join(clv)

    full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str(
        src_client_id) + '-icabbi-test.csv'
    full_rfm_with_value.to_csv(full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            full_rfm_file,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_period_transactions_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_frequency_recency_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_probability_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_calibration_vs_holdout_chart,
            filename=full_rfm_file)
Exemple #11
0
 def test_plot_probability_alive_matrix_max_frequency_max_recency(self):
     plt.figure()
     plotting.plot_probability_alive_matrix(bgf, max_frequency=100, max_recency=100)
     return plt.gcf()
Exemple #12
0
 def test_plot_probability_alive_matrix(self):
     plt.figure()
     plotting.plot_probability_alive_matrix(bgf)
     return plt.gcf()
def visualizeProbabilityAliveMatrix(betaGeoFitterModel):
    
    plot_probability_alive_matrix(betaGeoFitterModel)
    
    pylab.savefig("ProbabilityAliveMatrixPlot.png")
display()

# COMMAND ----------

# MAGIC %md From this chart, we can see this customer made his or her first purchase in January 2011 followed by a repeat purchase later that month.  There was about a 1-month lull in activity during which the probability of the customer being alive declined slightly but with purchases in March, April and June of that year, the customer sent repeated signals that he or she was engaged. Since that last June purchase, the customer hasn't been seen in our transaction history, and our belief that the customer remains engaged has been dropping though as a moderate pace given the signals previously sent.
# MAGIC
# MAGIC How does the model arrive at these probabilities? The exact math is tricky but by plotting the probability of being alive as a heatmap relative to frequency and recency, we can understand the probabilities assigned to the intersections of these two values:

# COMMAND ----------

from lifetimes.plotting import plot_probability_alive_matrix

# set figure size
plt.subplots(figsize=(12, 8))

plot_probability_alive_matrix(model)

display()

# COMMAND ----------

# MAGIC %md In addition to predicting the probability a customer is still alive, we can calculate the number of purchases expected from a customer over a given future time interval, such as over the next 30-days:

# COMMAND ----------

from lifetimes.plotting import plot_frequency_recency_matrix

# set figure size
plt.subplots(figsize=(12, 8))

plot_frequency_recency_matrix(model, T=30)
Exemple #15
0
                          on='customer_id')

df_final['wholesaler'] = np.where(df_final['predicted_cltv'] < 1000, 0, 1)
df_final['churn_group'] = np.where(df_final['probability_alive'] < .5, 0, 1)

df_final

# Plots and Validation

plot_period_transactions(bgf_mod)

cal_hold = calibration_and_holdout_data(
    df,
    'customer_id',
    'date',
    calibration_period_end='2018-12-31',  #3 years calibration
    observation_period_end='2020-12-31',  #2 year holdout
    freq=frq)

# plots the efficiacy of the model using the hold-out period
plt.rcParams['figure.figsize'] = (20, 10)
bgf = BetaGeoFitter()
bgf.fit(cal_hold['frequency_cal'], cal_hold['recency_cal'], cal_hold['T_cal'])
plot_calibration_purchases_vs_holdout_purchases(bgf, cal_hold)

fig = plt.figure(figsize=(8, 6))
plot_frequency_recency_matrix(bgf_mod)

fig = plt.figure(figsize=(8, 6))
plot_probability_alive_matrix(bgf_mod)
Exemple #16
0
# Data check
# ==========================================================================
# Order distribution by frequency
df["FREQUENCY"].plot(kind="hist", bins=50)

# ==========================================================================
# BG/NBD model
# ==========================================================================

bgf = BetaGeoFitter(penalizer_coef=0.01)
bgf.fit(df["FREQUENCY"], df["RECENCY"], df["T"])

bgf.summary

plotting.plot_frequency_recency_matrix(bgf)
plotting.plot_probability_alive_matrix(bgf)

# Repeat transaction model check
plotting.plot_period_transactions(bgf)

# ==========================================================================
# Ranking reps from best to worst
# ==========================================================================

t = 1
df["predicted_purchases"] = bgf.conditional_expected_number_of_purchases_up_to_time(
    t, df["FREQUENCY"], df["RECENCY"], df["T"])
df.sort_values(by="predicted_purchases").tail(10)

# ==========================================================================
# Gamma Gamme Model
# similar API to scikit-learn and lifelines.
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(X_train['txn_total'], X_train['recency_true']/7,
        X_train['T']/7)
print(bgf)

%matplotlib inline
from lifetimes.plotting import plot_frequency_recency_matrix

plot_frequency_recency_matrix(bgf)

#%%
from lifetimes.plotting import plot_probability_alive_matrix

f=plot_probability_alive_matrix(bgf)

t=52
X_train['predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
    t, X_train['txn_total'], X_train['recency_true']/7,
    X_train['T']/7)
#%%
from lifetimes.plotting import plot_period_transactions
f = plot_period_transactions(bgf)

#%%
X_train.sort_values('predicted_purchases')
#%%
# X_train.sort_values(by='predicted_purchases').head(5)
from lifetimes.plotting import plot_period_transactions
f = plot_period_transactions(bgf)
Exemple #18
0
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(summary['frequency'], summary['recency'], summary['T'])
print(bgf)

bgf.summary

from lifetimes.plotting import plot_frequency_recency_matrix
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [10, 10]

plot_frequency_recency_matrix(bgf, title="")

from lifetimes.plotting import plot_probability_alive_matrix

plot_probability_alive_matrix(bgf, title="")

t = 12
summary[
    'predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
        t, summary['frequency'], summary['recency'], summary['T'])
summary.sort_values(by='predicted_purchases').tail(10)

from lifetimes.plotting import plot_period_transactions
plt.rcParams['figure.figsize'] = [12, 3]

plot_period_transactions(bgf)

from lifetimes.utils import calibration_and_holdout_data

summary_cal_holdout = calibration_and_holdout_data(