def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self, transaction_data, bgf):
        summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind='time_since_last_purchase')
        return plt.gcf()
Exemple #2
0
    def test_plot_calibration_purchases_vs_holdout_purchases(self):
        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary)
        return plt.gcf()
Exemple #3
0
    def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self):
        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind='time_since_last_purchase')
        return plt.gcf()
Exemple #4
0
    def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self):
        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, "id", "date", "2014-09-01", "2014-12-31")
        bgf.fit(summary["frequency_cal"], summary["recency_cal"], summary["T_cal"])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind="time_since_last_purchase")
        return plt.gcf()
    def test_plot_calibration_purchases_vs_holdout_purchases(self):
        from matplotlib import pyplot as plt 

        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])
        
        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary)
        plt.show()
Exemple #6
0
    def test_plot_calibration_purchases_vs_holdout_purchases(self):
        from matplotlib import pyplot as plt

        transaction_data = load_transaction_data()
        summary = utils.calibration_and_holdout_data(transaction_data, 'id',
                                                     'date', '2014-09-01',
                                                     '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'],
                summary['T_cal'])

        plt.figure()
        plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary)
        plt.show()
def evaluation_plots(plot_type):
    """
    Evaluation Plots:
    - Tracking Cumulative Transactions
    - Tracking Daily Transactions
    - Frequency of Repeated Transactions
    - Calibration vs Holdout.

    Parameters
    ----------
        plot_type: str.
            "tracking" - Tracking Cumulative and Tracking Daily Transactions.
            "repeated" - Frequency of Repeated Transactions.
            "calibration_holdout" - Calibration vs Holdout Purchases.
    """
    # Loading Calibration Model.
    cal_bg_nbd = BetaGeoFitter(penalizer_coef=0.0)
    cal_bg_nbd.load_model(path="models/calibration_model.pkl")

    # Loading summary_cal_holdout dataset.
    summary_cal_holdout = pd.read_csv("datasets/summary_cal_holdout.csv")

    # Loading Transactions.
    transactions = pd.read_csv("datasets/transactions.csv")

    if plot_type == "tracking":
        fig = plt.figure(figsize=(20, 4))
        plot_cumulative_transactions(model=cal_bg_nbd,
                                     transactions=transactions,
                                     datetime_col="order_purchase_timestamp",
                                     customer_id_col="customer_unique_id",
                                     t=604,
                                     t_cal=512,
                                     freq="D",
                                     ax=fig.add_subplot(121))

        plot_incremental_transactions(model=cal_bg_nbd,
                                      transactions=transactions,
                                      datetime_col="order_purchase_timestamp",
                                      customer_id_col="customer_unique_id",
                                      t=604,
                                      t_cal=512,
                                      freq="D",
                                      ax=fig.add_subplot(122))
    elif plot_type == "repeated":
        plot_period_transactions(model=cal_bg_nbd)

    elif plot_type == "calibration_holdout":
        plot_calibration_purchases_vs_holdout_purchases(
            model=cal_bg_nbd, calibration_holdout_matrix=summary_cal_holdout)
    return
    def calibrate_bgf(self, calib_end_date, period_end_date, viz=False):
        '''
        Visualize the goodness of fit of BGF model
        '''
        summary_cal_holdout = calibration_and_holdout_data(self.transaction_data, 'CustomerNo', 'OrderDate',
                                            calibration_period_end=calib_end_date, #use 75% of data for training
                                            observation_period_end=period_end_date )
        if viz==True:
            print(summary_cal_holdout.head())

        self.bgf.fit(summary_cal_holdout['frequency_cal'], summary_cal_holdout['recency_cal'], summary_cal_holdout['T_cal'])
        plot_calibration_purchases_vs_holdout_purchases(self.bgf, summary_cal_holdout, colormap='coolwarm', alpha=0.75)
        plt.savefig('calibration_purchases_vs_holdout_purchases.png')
        plt.close()
    def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(
            self, transaction_data, bgf):
        holdout_expected = [3.954, 3.431, 3.482, 3.484, 2.75, 2.289, 1.968]
        predictions_expected = [
            4.345, 2.993, 3.236, 2.677, 2.240, 2.608, 2.430
        ]
        labels = ['frequency_holdout', 'model_predictions']

        summary = utils.calibration_and_holdout_data(transaction_data, 'id',
                                                     'date', '2014-09-01',
                                                     '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'],
                summary['T_cal'])

        ax = plotting.plot_calibration_purchases_vs_holdout_purchases(
            bgf, summary, kind='time_since_last_purchase')

        lines = ax.lines
        legend = ax.legend_
        holdout = lines[0].get_data()[1]
        predictions = lines[1].get_data()[1]

        assert_allclose(holdout, holdout_expected, atol=0.01)
        assert_allclose(predictions, predictions_expected, atol=0.01)
        assert_array_equal([e.get_text() for e in legend.get_texts()], labels)
        assert_equal(
            ax.title.get_text(),
            "Actual Purchases in Holdout Period vs Predicted Purchases")
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Time since user made last purchase")
        assert_equal(ax.yaxis.get_label().get_text(),
                     "Average of Purchases in Holdout Period")
        plt.close()
Exemple #10
0
    def test_plot_calibration_purchases_vs_holdout_purchases(
            self, transaction_data, bgf):
        holdout_expected = [0.161, 0.233, 0.348, 0.544, 0.710, 0.704, 1.606]
        predictions_expected = [
            0.270, 0.294, 0.402, 0.422, 0.706, 0.809, 1.019
        ]
        labels = ['frequency_holdout', 'model_predictions']

        summary = utils.calibration_and_holdout_data(transaction_data, 'id',
                                                     'date', '2014-09-01',
                                                     '2014-12-31')
        bgf.fit(summary['frequency_cal'], summary['recency_cal'],
                summary['T_cal'])

        ax = plotting.plot_calibration_purchases_vs_holdout_purchases(
            bgf, summary)

        lines = ax.lines
        legend = ax.legend_
        holdout = lines[0].get_data()[1]
        predictions = lines[1].get_data()[1]

        assert_allclose(holdout, holdout_expected, atol=0.01)
        assert_allclose(predictions, predictions_expected, atol=0.01)
        assert_array_equal([e.get_text() for e in legend.get_texts()], labels)
        assert_equal(
            ax.title.get_text(),
            "Actual Purchases in Holdout Period vs Predicted Purchases")
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Purchases in calibration period")
        assert_equal(ax.yaxis.get_label().get_text(),
                     "Average of Purchases in Holdout Period")
        plt.close()
Exemple #11
0

from lifetimes.plotting import plot_period_transactions
#used to validate the model
plot_period_transactions(bgf) 

#another type of model validation
summary_cal_holdout = calibration_and_holdout_data(df, 'CustomerID', 'InvoiceDate',
                                        calibration_period_end='2011-06-08',
                                        observation_period_end='2011-12-9' )   
print(summary_cal_holdout.head())

from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases

bgf.fit(summary_cal_holdout['frequency_cal'], summary_cal_holdout['recency_cal'], summary_cal_holdout['T_cal'])
plot_calibration_purchases_vs_holdout_purchases(bgf, summary_cal_holdout)


#predict the number of purchase made with t =days for single customer
t = 30
individual = modeldata.loc[12380]
bgf.predict(t, individual['frequency'], individual['recency'], individual['T'])


from lifetimes.plotting import plot_history_alive
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(12,8)) # plot setting width and height
id = 14620  # id = 18074  id = 14606
days_since_birth = 365
sp_trans = df.loc[df['CustomerID'] == id]
Exemple #12
0
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context):
    import matplotlib.pyplot
    matplotlib.pyplot.ioff()
    ##
    from lifetimes.utils import calibration_and_holdout_data
    from lifetimes.plotting import plot_frequency_recency_matrix
    from lifetimes.plotting import plot_probability_alive_matrix
    from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases
    from lifetimes.plotting import plot_period_transactions
    from lifetimes.plotting import plot_history_alive
    from lifetimes.plotting import plot_cumulative_transactions
    from lifetimes.utils import expected_cumulative_transactions
    from lifetimes.utils import summary_data_from_transaction_data
    from lifetimes import BetaGeoFitter
    from lifetimes import GammaGammaFitter
    import datetime
    import pandas as pd
    import datalab.storage as gcs
    conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn()
    print(src_client_id, context)
    query = context['templates_dict']['query']
    data = pd.read_sql(query, con=conn)
    data.columns = data.columns.str.lower()
    print(data.head())

    # Calculate RFM values#
    calibration_end_date = datetime.datetime(2018, 5, 24)
    training_rfm = calibration_and_holdout_data(
        transactions=data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        calibration_period_end=calibration_end_date,
        freq='D',
        monetary_value_col='price_total')
    bgf = BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'],
            training_rfm['T_cal'])
    print(bgf)

    # Matrix charts
    plot_period_transactions_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_period_transactions_chart.svg'
    plot_frequency_recency_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_frequency_recency_matrix.svg'
    plot_probability_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_probability_alive_matrix.svg'
    plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_calibration_vs_holdout_purchases.svg'

    ax0 = plot_period_transactions(bgf, max_frequency=30)
    ax0.figure.savefig(plot_period_transactions_chart, format='svg')
    ax1 = plot_frequency_recency_matrix(bgf)
    ax1.figure.savefig(plot_frequency_recency_chart, format='svg')
    ax2 = plot_probability_alive_matrix(bgf)
    ax2.figure.savefig(plot_probability_chart, format='svg')
    ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf,
                                                          training_rfm,
                                                          n=50)
    ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg')
    full_rfm = summary_data_from_transaction_data(
        data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        monetary_value_col='price_total',
        datetime_format=None,
        observation_period_end=None,
        freq='D')
    returning_full_rfm = full_rfm[full_rfm['frequency'] > 0]
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(returning_full_rfm['frequency'],
            returning_full_rfm['monetary_value'])

    customer_lifetime = 30  # expected number of months lifetime of a customer
    clv = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        full_rfm['frequency'],
        full_rfm['recency'],
        full_rfm['T'],
        full_rfm['monetary_value'],
        time=customer_lifetime,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    ).sort_values(ascending=False)
    full_rfm_with_value = full_rfm.join(clv)

    full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str(
        src_client_id) + '-icabbi-test.csv'
    full_rfm_with_value.to_csv(full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            full_rfm_file,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_period_transactions_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_frequency_recency_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_probability_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_calibration_vs_holdout_chart,
            filename=full_rfm_file)
Exemple #13
0
            val = np.sqrt(val)
    # Mean Absolute Error
    elif metric == 'mae':
        val = np.sum(np.abs(actuals - predicted)) / actuals.shape[0]
    else:
        val = None
    return val


# score the model
print('MSE: {0}'.format(
    score_model(combined_data["frequency_holdout"], predicted_freq, 'mse')))

# Modelleri karşılaştırmak için önemli olsa da, MSE metriğini herhangi bir modelin genel fit iyiliği açısından yorumlamak biraz daha zordur.
# Modelimizin verilerimize ne kadar iyi fit olduğuna dair daha fazla bilgi sağlamak için, bazı gerçek ve tahmin edilen değerler arasındaki ilişkileri görselleştirelim.
plot_calibration_purchases_vs_holdout_purchases(bgf, combined_data)
plt.show()

plot_period_transactions(bgf)
plt.show()

# Tahmin edilen frekans değeri combined_data'ya ekleme
combined_data["frequency_predict"] = predicted_freq
combined_data.head()

##############################################################
# GAMMA GAMMA MODEL
##############################################################

# Gamma Gamma'yı kullanabileceğimizden emin olmak için, frekans ve parasal değerlerin
# ilişkili olup olmadığını kontrol etmemiz gerekir. (?)
mse = score_model(frequency_holdout_actual, frequency_holdout_predicted, 'mse')

print('MSE: {0}'.format(mse))

# COMMAND ----------

# MAGIC %md While important for comparing models, the MSE metric is a bit more challenging to interpret in terms of the overall goodness of fit of any individual model.  To provide more insight into how well our model fits our data, let's visualize the relationships between some actual and predicted values.
# MAGIC
# MAGIC To get started, we can examine how purchase frequencies in the calibration period relates to actual (frequency_holdout) and predicted (model_predictions) frequencies in the holdout period:

# COMMAND ----------

from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases

plot_calibration_purchases_vs_holdout_purchases(model,
                                                input_pd,
                                                n=90,
                                                **{'figsize': (8, 8)})

display()

# COMMAND ----------

# MAGIC %md What we see here is that a higher number of purchases in the calibration period predicts a higher average number of purchases in the holdout period but the actual values diverge sharply from model predictions when we consider customers with a large number of purchases (>60) in the calibration period.  Thinking back to the charts in the data exploration section of this notebook, you might recall that there are very few customers with such a large number of purchases so that this divergence may be a result of a very limited number of instances at the higher end of the frequency range. More data may bring the predicted and actuals back together at this higher end of the curve.  If this divergence persists, it may indicate a range of customer engagement frequency above which we cannot make reliable predictions.
# MAGIC
# MAGIC Using the same method call, we can visualize time since last purchase relative to the average number of purchases in the holdout period. This visualization illustrates that as time since the last purchase increases, the number of purchases in the holdout period decreases.  In otherwords, those customers we haven't seen in a while aren't likely coming back anytime soon:
# MAGIC
# MAGIC NOTE As before, we will hide the code in the following cells to focus on the visualizations.  Use **Show code** to see the associated Python logic.

# COMMAND ----------

plot_calibration_purchases_vs_holdout_purchases(
Exemple #15
0
                          on='customer_id')

df_final['wholesaler'] = np.where(df_final['predicted_cltv'] < 1000, 0, 1)
df_final['churn_group'] = np.where(df_final['probability_alive'] < .5, 0, 1)

df_final

# Plots and Validation

plot_period_transactions(bgf_mod)

cal_hold = calibration_and_holdout_data(
    df,
    'customer_id',
    'date',
    calibration_period_end='2018-12-31',  #3 years calibration
    observation_period_end='2020-12-31',  #2 year holdout
    freq=frq)

# plots the efficiacy of the model using the hold-out period
plt.rcParams['figure.figsize'] = (20, 10)
bgf = BetaGeoFitter()
bgf.fit(cal_hold['frequency_cal'], cal_hold['recency_cal'], cal_hold['T_cal'])
plot_calibration_purchases_vs_holdout_purchases(bgf, cal_hold)

fig = plt.figure(figsize=(8, 6))
plot_frequency_recency_matrix(bgf_mod)

fig = plt.figure(figsize=(8, 6))
plot_probability_alive_matrix(bgf_mod)