Example #1
0
    def test_plot_period_transactions_parento(self):
        pnbd = ParetoNBDFitter()
        pnbd.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1)

        plt.figure()
        plotting.plot_period_transactions(pnbd)
        return plt.gcf()
Example #2
0
 def test_plot_period_transactions_mbgf(self):
     mbgf = ModifiedBetaGeoFitter()
     mbgf.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1)
     
     plt.figure()
     plotting.plot_period_transactions(mbgf)
     return plt.gcf()
Example #3
0
    def test_plot_period_transactions_mbgf(self):
        mbgf = ModifiedBetaGeoFitter()
        mbgf.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1)

        plt.figure()
        plotting.plot_period_transactions(mbgf)
        return plt.gcf()
Example #4
0
 def test_plot_period_transactions_parento(self):
     pnbd = ParetoNBDFitter()
     pnbd.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1)
     
     plt.figure()
     plotting.plot_period_transactions(pnbd)
     return plt.gcf()
def evaluation_plots(plot_type):
    """
    Evaluation Plots:
    - Tracking Cumulative Transactions
    - Tracking Daily Transactions
    - Frequency of Repeated Transactions
    - Calibration vs Holdout.

    Parameters
    ----------
        plot_type: str.
            "tracking" - Tracking Cumulative and Tracking Daily Transactions.
            "repeated" - Frequency of Repeated Transactions.
            "calibration_holdout" - Calibration vs Holdout Purchases.
    """
    # Loading Calibration Model.
    cal_bg_nbd = BetaGeoFitter(penalizer_coef=0.0)
    cal_bg_nbd.load_model(path="models/calibration_model.pkl")

    # Loading summary_cal_holdout dataset.
    summary_cal_holdout = pd.read_csv("datasets/summary_cal_holdout.csv")

    # Loading Transactions.
    transactions = pd.read_csv("datasets/transactions.csv")

    if plot_type == "tracking":
        fig = plt.figure(figsize=(20, 4))
        plot_cumulative_transactions(model=cal_bg_nbd,
                                     transactions=transactions,
                                     datetime_col="order_purchase_timestamp",
                                     customer_id_col="customer_unique_id",
                                     t=604,
                                     t_cal=512,
                                     freq="D",
                                     ax=fig.add_subplot(121))

        plot_incremental_transactions(model=cal_bg_nbd,
                                      transactions=transactions,
                                      datetime_col="order_purchase_timestamp",
                                      customer_id_col="customer_unique_id",
                                      t=604,
                                      t_cal=512,
                                      freq="D",
                                      ax=fig.add_subplot(122))
    elif plot_type == "repeated":
        plot_period_transactions(model=cal_bg_nbd)

    elif plot_type == "calibration_holdout":
        plot_calibration_purchases_vs_holdout_purchases(
            model=cal_bg_nbd, calibration_holdout_matrix=summary_cal_holdout)
    return
 def viz_bgf(self, t):
     #visualize customer frequency and recency matrix
     plot_frequency_recency_matrix(self.bgf, T=t, cmap='coolwarm')
     plt.savefig('sales_frequency_recency_matrix.png')
     plt.close()
     #visualize customer alive probability
     plot_probability_alive_matrix(self.bgf, cmap='coolwarm')
     plt.savefig('probability_alive_matrix.png')
     plt.close()
     #visualize expected repeat Purchases
     plot_expected_repeat_purchases(self.bgf)
     plt.savefig('ProbabilityExpectedRepeatPurchases.png')
     plt.close()
     #visualize the expected number of period transactions
     plot_period_transactions(self.bgf)
     plt.savefig('period_transactions.png')
     plt.close()
Example #7
0
    def test_plot_period_transactions_labels(self, bgf):
        expected = [1411, 439, 214, 100, 62, 38, 29, 1411, 439, 214, 100, 62, 38, 29]

        ax = plotting.plot_period_transactions(bgf, label=['A', 'B'])

        assert_allclose([p.get_height() for p in ax.patches], expected, rtol=0.3)
        assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions")
        assert_equal(ax.xaxis.get_label().get_text(), "Number of Calibration Period Transactions")
        assert_equal(ax.yaxis.get_label().get_text(), "Customers")
        assert_array_equal([label.get_text() for label in ax.legend_.get_texts()], ["A", "B"])
        plt.close()
Example #8
0
    def test_plot_period_transactions_max_frequency(self, bgf):
        expected = [1411, 439, 214, 100, 62, 38, 29, 23, 7, 5, 5, 5,
                    1429, 470, 155, 89, 71, 39, 26, 20, 18, 9, 6, 7]

        ax = plotting.plot_period_transactions(bgf, max_frequency=12)

        assert_allclose([p.get_height() for p in ax.patches], expected, atol=50)  # can be large relative differences for small counts
        assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions")
        assert_equal(ax.xaxis.get_label().get_text(), "Number of Calibration Period Transactions")
        assert_equal(ax.yaxis.get_label().get_text(), "Customers")
        assert_array_equal([label.get_text() for label in ax.legend_.get_texts()], ["Actual", "Model"])
        plt.close()
Example #9
0
    def test_plot_period_transactions_mbgf(self, cd_data):

        mbgf = ModifiedBetaGeoFitter()
        mbgf.fit(cd_data['frequency'], cd_data['recency'], cd_data['T'], iterative_fitting=1)

        ax = plotting.plot_period_transactions(mbgf)

        assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions")
        assert_equal(ax.xaxis.get_label().get_text(), "Number of Calibration Period Transactions")
        assert_equal(ax.yaxis.get_label().get_text(), "Customers")
        assert_array_equal([label.get_text() for label in ax.legend_.get_texts()], ["Actual", "Model"])
        plt.close()
 def test_plot_period_transactions(self):
     from matplotlib import pyplot as plt
     
     plotting.plot_period_transactions(bgf)
     
     plotting.plot_period_transactions(bgf, max_frequency=12)
     
     plotting.plot_period_transactions(bgf, label=['A', 'B'])
     plt.show()
Example #11
0
    def test_plot_period_transactions(self):
        from matplotlib import pyplot as plt

        plotting.plot_period_transactions(bgf)

        plotting.plot_period_transactions(bgf, max_frequency=12)

        plotting.plot_period_transactions(bgf, label=['A', 'B'])
        plt.show()
Example #12
0
    def test_plot_period_transactions(self):
        from matplotlib import pyplot as plt

        plt.figure()
        plotting.plot_period_transactions(BG)

        plt.figure()
        plotting.plot_period_transactions(BG, bins=range(5))

        plt.figure()
        plotting.plot_period_transactions(BG, label=['A', 'B'])
        plt.show()
Example #13
0
    def test_plot_period_transactions_mbgf(self, cd_data):
        expected = [
            1411, 439, 214, 100, 62, 38, 29, 1427, 410, 211, 118, 56, 47, 29
        ]

        mbgf = ModifiedBetaGeoFitter()
        mbgf.fit(cd_data['frequency'],
                 cd_data['recency'],
                 cd_data['T'],
                 iterative_fitting=1)

        ax = plotting.plot_period_transactions(mbgf)

        assert_allclose([p.get_height() for p in ax.patches],
                        expected,
                        rtol=0.3)
        assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions")
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Number of Calibration Period Transactions")
        assert_equal(ax.yaxis.get_label().get_text(), "Customers")
        assert_array_equal(
            [label.get_text() for label in ax.legend_.get_texts()],
            ["Actual", "Model"])
        plt.close()
Example #14
0
    def test_plot_period_transactions_pareto(self, cd_data):
        expected = [
            1411, 439, 214, 100, 62, 38, 29, 1199, 330, 160, 100, 64, 47, 34
        ]

        pnbd = ParetoNBDFitter()
        pnbd.fit(cd_data['frequency'],
                 cd_data['recency'],
                 cd_data['T'],
                 iterative_fitting=1)

        ax = plotting.plot_period_transactions(pnbd)

        assert_allclose([p.get_height() for p in ax.patches],
                        expected,
                        rtol=0.3)
        assert_equal(ax.title.get_text(), "Frequency of Repeat Transactions")
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Number of Calibration Period Transactions")
        assert_equal(ax.yaxis.get_label().get_text(), "Customers")
        assert_array_equal(
            [label.get_text() for label in ax.legend_.get_texts()],
            ["Actual", "Model"])
        plt.close()
from lifetimes.plotting import plot_frequency_recency_matrix

plot_frequency_recency_matrix(bgf)

#%%
from lifetimes.plotting import plot_probability_alive_matrix

f=plot_probability_alive_matrix(bgf)

t=52
X_train['predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
    t, X_train['txn_total'], X_train['recency_true']/7,
    X_train['T']/7)
#%%
from lifetimes.plotting import plot_period_transactions
f = plot_period_transactions(bgf)

#%%
X_train.sort_values('predicted_purchases')
#%%
# X_train.sort_values(by='predicted_purchases').head(5)
from lifetimes.plotting import plot_period_transactions
f = plot_period_transactions(bgf)
#%%
import matplotlib.pyplot as plt

f = plt.figure()
plot_frequency_recency_matrix(bgf)
f.savefig("foo.pdf", bbox_inches='tight')

#%%
Example #16
0
df["FREQUENCY"].plot(kind="hist", bins=50)

# ==========================================================================
# BG/NBD model
# ==========================================================================

bgf = BetaGeoFitter(penalizer_coef=0.01)
bgf.fit(df["FREQUENCY"], df["RECENCY"], df["T"])

bgf.summary

plotting.plot_frequency_recency_matrix(bgf)
plotting.plot_probability_alive_matrix(bgf)

# Repeat transaction model check
plotting.plot_period_transactions(bgf)

# ==========================================================================
# Ranking reps from best to worst
# ==========================================================================

t = 1
df["predicted_purchases"] = bgf.conditional_expected_number_of_purchases_up_to_time(
    t, df["FREQUENCY"], df["RECENCY"], df["T"])
df.sort_values(by="predicted_purchases").tail(10)

# ==========================================================================
# Gamma Gamme Model
# Model assumes that there is no relationship between the monetary value and the purchase frequency
# ==========================================================================
Example #17
0
 def test_plot_period_transactions_labels(self):
     plt.figure()
     plotting.plot_period_transactions(bgf, label=['A', 'B'])
     return plt.gcf()
Example #18
0
 def test_plot_period_transactions_max_frequency(self):
     plt.figure()
     plotting.plot_period_transactions(bgf, max_frequency=12)
     return plt.gcf()
Example #19
0
 def test_plot_period_transactions(self):
     plt.figure()
     plotting.plot_period_transactions(bgf)
     return plt.gcf()
Example #20
0
print(f"Actual Average Sales: {rc['monetary_value'].mean()}")

df2 = df.drop_duplicates(subset=['email'])
df2.shape  #df.drop_duplicates(subset=['brand'])
df_final = rc.merge(df2[['customer_id', 'email']], on='customer_id')
df_final = df_final.merge(rfm_cluster[['customer_id', 'clusterID']],
                          on='customer_id')

df_final['wholesaler'] = np.where(df_final['predicted_cltv'] < 1000, 0, 1)
df_final['churn_group'] = np.where(df_final['probability_alive'] < .5, 0, 1)

df_final

# Plots and Validation

plot_period_transactions(bgf_mod)

cal_hold = calibration_and_holdout_data(
    df,
    'customer_id',
    'date',
    calibration_period_end='2018-12-31',  #3 years calibration
    observation_period_end='2020-12-31',  #2 year holdout
    freq=frq)

# plots the efficiacy of the model using the hold-out period
plt.rcParams['figure.figsize'] = (20, 10)
bgf = BetaGeoFitter()
bgf.fit(cal_hold['frequency_cal'], cal_hold['recency_cal'], cal_hold['T_cal'])
plot_calibration_purchases_vs_holdout_purchases(bgf, cal_hold)
Example #21
0
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context):
    import matplotlib.pyplot
    matplotlib.pyplot.ioff()
    ##
    from lifetimes.utils import calibration_and_holdout_data
    from lifetimes.plotting import plot_frequency_recency_matrix
    from lifetimes.plotting import plot_probability_alive_matrix
    from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases
    from lifetimes.plotting import plot_period_transactions
    from lifetimes.plotting import plot_history_alive
    from lifetimes.plotting import plot_cumulative_transactions
    from lifetimes.utils import expected_cumulative_transactions
    from lifetimes.utils import summary_data_from_transaction_data
    from lifetimes import BetaGeoFitter
    from lifetimes import GammaGammaFitter
    import datetime
    import pandas as pd
    import datalab.storage as gcs
    conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn()
    print(src_client_id, context)
    query = context['templates_dict']['query']
    data = pd.read_sql(query, con=conn)
    data.columns = data.columns.str.lower()
    print(data.head())

    # Calculate RFM values#
    calibration_end_date = datetime.datetime(2018, 5, 24)
    training_rfm = calibration_and_holdout_data(
        transactions=data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        calibration_period_end=calibration_end_date,
        freq='D',
        monetary_value_col='price_total')
    bgf = BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'],
            training_rfm['T_cal'])
    print(bgf)

    # Matrix charts
    plot_period_transactions_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_period_transactions_chart.svg'
    plot_frequency_recency_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_frequency_recency_matrix.svg'
    plot_probability_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_probability_alive_matrix.svg'
    plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_calibration_vs_holdout_purchases.svg'

    ax0 = plot_period_transactions(bgf, max_frequency=30)
    ax0.figure.savefig(plot_period_transactions_chart, format='svg')
    ax1 = plot_frequency_recency_matrix(bgf)
    ax1.figure.savefig(plot_frequency_recency_chart, format='svg')
    ax2 = plot_probability_alive_matrix(bgf)
    ax2.figure.savefig(plot_probability_chart, format='svg')
    ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf,
                                                          training_rfm,
                                                          n=50)
    ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg')
    full_rfm = summary_data_from_transaction_data(
        data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        monetary_value_col='price_total',
        datetime_format=None,
        observation_period_end=None,
        freq='D')
    returning_full_rfm = full_rfm[full_rfm['frequency'] > 0]
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(returning_full_rfm['frequency'],
            returning_full_rfm['monetary_value'])

    customer_lifetime = 30  # expected number of months lifetime of a customer
    clv = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        full_rfm['frequency'],
        full_rfm['recency'],
        full_rfm['T'],
        full_rfm['monetary_value'],
        time=customer_lifetime,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    ).sort_values(ascending=False)
    full_rfm_with_value = full_rfm.join(clv)

    full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str(
        src_client_id) + '-icabbi-test.csv'
    full_rfm_with_value.to_csv(full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            full_rfm_file,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_period_transactions_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_frequency_recency_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_probability_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_calibration_vs_holdout_chart,
            filename=full_rfm_file)
Example #22
0
 def test_plot_period_transactions_labels(self):
     plt.figure()
     plotting.plot_period_transactions(bgf, label=['A', 'B'])
     return plt.gcf()
Example #23
0
 def test_plot_period_transactions_max_frequency(self):
     plt.figure()
     plotting.plot_period_transactions(bgf, max_frequency=12)
     return plt.gcf()
def visualizePlotPeriodTransaction(betaGeoFitterModel):
    
    plot_period_transactions(betaGeoFitterModel)
    
    pylab.savefig("PeriodTransactionPlot.png")
Example #25
0
 def test_plot_period_transactions(self):
     plt.figure()
     plotting.plot_period_transactions(bgf)
     return plt.gcf()