예제 #1
0
    def test_plot_frequency_recency_matrix(self):
        from matplotlib import pyplot as plt

        plt.figure()
        plotting.plot_frequency_recency_matrix(bgf)

        plt.figure()
        plotting.plot_frequency_recency_matrix(bgf, max_recency=100, max_frequency=50)

        plt.show()
예제 #2
0
    def test_plot_frequency_recency_matrix(self):
        from matplotlib import pyplot as plt

        plt.figure()
        plotting.plot_frequency_recency_matrix(BG)

        plt.figure()
        plotting.plot_frequency_recency_matrix(BG, max_t=100, max_x=50)

        plt.show()
예제 #3
0
    def test_plot_frequency_recency_matrix_max_frequency_max_recency(
            self, bgf):
        shape = (101, 101)
        row_idx = 95
        row = [
            0.002, 0.008, 0.017, 0.025, 0.034, 0.043, 0.052, 0.060, 0.069,
            0.078, 0.087, 0.096, 0.105, 0.114, 0.123, 0.132, 0.140, 0.149,
            0.158, 0.166, 0.175, 0.184, 0.192, 0.201, 0.209, 0.218, 0.226,
            0.235, 0.243, 0.251, 0.259, 0.267, 0.275, 0.283, 0.291, 0.299,
            0.307, 0.314, 0.322, 0.330, 0.337, 0.344, 0.352, 0.359, 0.366,
            0.373, 0.379, 0.386, 0.393, 0.399, 0.405, 0.411, 0.417, 0.423,
            0.429, 0.435, 0.440, 0.445, 0.450, 0.455, 0.460, 0.465, 0.469,
            0.473, 0.477, 0.481, 0.484, 0.488, 0.491, 0.494, 0.497, 0.499,
            0.501, 0.503, 0.505, 0.506, 0.508, 0.509, 0.509, 0.510, 0.510,
            0.510, 0.510, 0.509, 0.508, 0.507, 0.506, 0.504, 0.503, 0.501,
            0.498, 0.496, 0.493, 0.490, 0.486, 0.483, 0.479, 0.475, 0.471,
            0.466, 0.462
        ]

        ax = plotting.plot_frequency_recency_matrix(bgf,
                                                    max_frequency=100,
                                                    max_recency=100)
        ar = ax.get_images()[0].get_array()
        assert_array_equal(ar.shape, shape)
        assert_allclose(ar[row_idx, :].data, row,
                        atol=0.01)  # only test one row for brevity
        assert_equal(
            ax.title.get_text(),
            "Expected Number of Future Purchases for 1 Unit of Time,\nby Frequency and Recency of a Customer"
        )
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Customer's Historical Frequency")
        assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency")
        plt.close()
예제 #4
0
    def test_plot_frequency_recency_matrix_max_frequency(self, bgf):
        shape = (39, 101)
        row_idx = 35
        row = [
            0.005, 0.021, 0.041, 0.061, 0.082, 0.103, 0.125, 0.146, 0.167,
            0.188, 0.208, 0.229, 0.250, 0.270, 0.290, 0.310, 0.330, 0.349,
            0.369, 0.388, 0.406, 0.425, 0.443, 0.460, 0.478, 0.495, 0.511,
            0.528, 0.543, 0.559, 0.573, 0.587, 0.601, 0.614, 0.627, 0.639,
            0.650, 0.660, 0.670, 0.679, 0.688, 0.695, 0.702, 0.708, 0.713,
            0.718, 0.721, 0.724, 0.726, 0.727, 0.727, 0.726, 0.724, 0.721,
            0.718, 0.713, 0.708, 0.702, 0.695, 0.687, 0.679, 0.670, 0.660,
            0.649, 0.638, 0.627, 0.615, 0.602, 0.589, 0.575, 0.562, 0.548,
            0.533, 0.519, 0.504, 0.489, 0.475, 0.460, 0.445, 0.430, 0.416,
            0.401, 0.387, 0.372, 0.359, 0.345, 0.331, 0.318, 0.305, 0.293,
            0.280, 0.269, 0.257, 0.246, 0.235, 0.224, 0.214, 0.204, 0.195,
            0.186, 0.177
        ]

        ax = plotting.plot_frequency_recency_matrix(bgf, max_frequency=100)
        ar = ax.get_images()[0].get_array()
        assert_array_equal(ar.shape, shape)
        assert_allclose(ar[row_idx, :].data, row,
                        atol=0.01)  # only test one row for brevity
        assert_equal(
            ax.title.get_text(),
            "Expected Number of Future Purchases for 1 Unit of Time,\nby Frequency and Recency of a Customer"
        )
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Customer's Historical Frequency")
        assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency")
        plt.close()
예제 #5
0
    def test_plot_frequency_recency_matrix_max_recency(self, bgf):
        shape = (101, 30)
        col_idx = 25
        col = [
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0.001, 0.001, 0.002, 0.002, 0.004, 0.005, 0.007, 0.010,
            0.014, 0.018, 0.024, 0.032, 0.041, 0.052, 0.065, 0.080, 0.096,
            0.112, 0.129, 0.145, 0.160, 0.174, 0.186, 0.196, 0.205, 0.212,
            0.218, 0.222, 0.226, 0.229, 0.232, 0.233
        ]

        ax = plotting.plot_frequency_recency_matrix(bgf, max_recency=100)
        ar = ax.get_images()[0].get_array()
        assert_array_equal(ar.shape, shape)
        assert_allclose(ar[:, col_idx].data, col,
                        atol=0.01)  # only test one row for brevity
        assert_equal(
            ax.title.get_text(),
            "Expected Number of Future Purchases for 1 Unit of Time,\nby Frequency and Recency of a Customer"
        )
        assert_equal(ax.xaxis.get_label().get_text(),
                     "Customer's Historical Frequency")
        assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency")
        plt.close()
 def viz_bgf(self, t):
     #visualize customer frequency and recency matrix
     plot_frequency_recency_matrix(self.bgf, T=t, cmap='coolwarm')
     plt.savefig('sales_frequency_recency_matrix.png')
     plt.close()
     #visualize customer alive probability
     plot_probability_alive_matrix(self.bgf, cmap='coolwarm')
     plt.savefig('probability_alive_matrix.png')
     plt.close()
     #visualize expected repeat Purchases
     plot_expected_repeat_purchases(self.bgf)
     plt.savefig('ProbabilityExpectedRepeatPurchases.png')
     plt.close()
     #visualize the expected number of period transactions
     plot_period_transactions(self.bgf)
     plt.savefig('period_transactions.png')
     plt.close()
예제 #7
0
    def test_plot_frequency_recency_matrix(self, bgf):
        shape = (39, 30)
        row_idx = 29
        row = [0.005, 0.020, 0.037, 0.054, 0.070, 0.085, 0.099, 0.110, 0.120, 0.127, 0.133,
               0.136, 0.136, 0.135, 0.131, 0.125, 0.119, 0.111, 0.102, 0.093, 0.084, 0.075,
               0.066, 0.058, 0.050, 0.044, 0.038, 0.032, 0.027, 0.023]

        ax = plotting.plot_frequency_recency_matrix(bgf)
        ar = ax.get_images()[0].get_array()
        assert_array_equal(ar.shape, shape)
        assert_allclose(ar[row_idx, :].data, row, atol=0.01)  # only test one row for brevity
        assert_equal(ax.title.get_text(), "Expected Number of Future Purchases for 1 Unit of Time,\nby Frequency and Recency of a Customer")
        assert_equal(ax.xaxis.get_label().get_text(), "Customer's Historical Frequency")
        assert_equal(ax.yaxis.get_label().get_text(), "Customer's Recency")
        plt.close()
예제 #8
0
#Load the dataset from csv file and view the contents of the data
clv = pd.read_csv("C:\\data\\ecommercesales.csv")
clv.head(6)

# #### Step 3: Transform transactional data for CLV analysis

# In[14]:

from lifetimes.utils import summary_data_from_transaction_data
clvsum = summary_data_from_transaction_data(
    clv, 'InvoiceDate', 'CustID', observation_period_end='2016-01-01')
print clvsum.head(100)

# In[ ]:

print clvsum.tail()

# Step 4: Fit data to the Beta-geometric / NBD model

# In[4]:

Betageo = BetaGeoFitter()
Betageo.fit(clvsum['frequency'], clvsum['recency'], clvsum['T'])

# ### Frequency / Recency Matrix

# In[5]:

from lifetimes.plotting import plot_frequency_recency_matrix
plot_frequency_recency_matrix(Betageo)
예제 #9
0
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context):
    import matplotlib.pyplot
    matplotlib.pyplot.ioff()
    ##
    from lifetimes.utils import calibration_and_holdout_data
    from lifetimes.plotting import plot_frequency_recency_matrix
    from lifetimes.plotting import plot_probability_alive_matrix
    from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases
    from lifetimes.plotting import plot_period_transactions
    from lifetimes.plotting import plot_history_alive
    from lifetimes.plotting import plot_cumulative_transactions
    from lifetimes.utils import expected_cumulative_transactions
    from lifetimes.utils import summary_data_from_transaction_data
    from lifetimes import BetaGeoFitter
    from lifetimes import GammaGammaFitter
    import datetime
    import pandas as pd
    import datalab.storage as gcs
    conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn()
    print(src_client_id, context)
    query = context['templates_dict']['query']
    data = pd.read_sql(query, con=conn)
    data.columns = data.columns.str.lower()
    print(data.head())

    # Calculate RFM values#
    calibration_end_date = datetime.datetime(2018, 5, 24)
    training_rfm = calibration_and_holdout_data(
        transactions=data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        calibration_period_end=calibration_end_date,
        freq='D',
        monetary_value_col='price_total')
    bgf = BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'],
            training_rfm['T_cal'])
    print(bgf)

    # Matrix charts
    plot_period_transactions_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_period_transactions_chart.svg'
    plot_frequency_recency_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_frequency_recency_matrix.svg'
    plot_probability_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_probability_alive_matrix.svg'
    plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_calibration_vs_holdout_purchases.svg'

    ax0 = plot_period_transactions(bgf, max_frequency=30)
    ax0.figure.savefig(plot_period_transactions_chart, format='svg')
    ax1 = plot_frequency_recency_matrix(bgf)
    ax1.figure.savefig(plot_frequency_recency_chart, format='svg')
    ax2 = plot_probability_alive_matrix(bgf)
    ax2.figure.savefig(plot_probability_chart, format='svg')
    ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf,
                                                          training_rfm,
                                                          n=50)
    ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg')
    full_rfm = summary_data_from_transaction_data(
        data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        monetary_value_col='price_total',
        datetime_format=None,
        observation_period_end=None,
        freq='D')
    returning_full_rfm = full_rfm[full_rfm['frequency'] > 0]
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(returning_full_rfm['frequency'],
            returning_full_rfm['monetary_value'])

    customer_lifetime = 30  # expected number of months lifetime of a customer
    clv = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        full_rfm['frequency'],
        full_rfm['recency'],
        full_rfm['T'],
        full_rfm['monetary_value'],
        time=customer_lifetime,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    ).sort_values(ascending=False)
    full_rfm_with_value = full_rfm.join(clv)

    full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str(
        src_client_id) + '-icabbi-test.csv'
    full_rfm_with_value.to_csv(full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            full_rfm_file,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_period_transactions_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_frequency_recency_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_probability_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_calibration_vs_holdout_chart,
            filename=full_rfm_file)
예제 #10
0
 def test_plot_frequency_recency_matrix_max_frequency_max_recency(self):
     plt.figure()
     plotting.plot_frequency_recency_matrix(bgf, max_frequency=100, max_recency=100)
     return plt.gcf()
예제 #11
0
 def test_plot_frequency_recency_matrix_max_frequency_max_recency(self):
     plt.figure()
     plotting.plot_frequency_recency_matrix(bgf,
                                            max_frequency=100,
                                            max_recency=100)
     return plt.gcf()
예제 #12
0
파일: lifetimes.py 프로젝트: tqizzle/test
import lifetimes
from lifetimes import BetaGeoFitter
from lifetimes.plotting import plot_frequency_recency_matrix
from lifetimes.plotting import plot_probability_alive_matrix

import pandas as pd

data = pd.read_csv('lifetimes')


bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(data['frequency'], data['recency'], data['T'])

print bgf

plot_frequency_recency_matrix(bgf)

#plot_probability_alive_matrix(bgf)
plot_probability_alive_matrix(model)

display()

# COMMAND ----------

# MAGIC %md In addition to predicting the probability a customer is still alive, we can calculate the number of purchases expected from a customer over a given future time interval, such as over the next 30-days:

# COMMAND ----------

from lifetimes.plotting import plot_frequency_recency_matrix

# set figure size
plt.subplots(figsize=(12, 8))

plot_frequency_recency_matrix(model, T=30)

display()

# COMMAND ----------

# MAGIC %md As before, we can calculate this probability for each customer based on their current metrics:

# COMMAND ----------

filtered_pd['purchases_next30days'] = (
    model.conditional_expected_number_of_purchases_up_to_time(
        30, filtered_pd['frequency'], filtered_pd['recency'],
        filtered_pd['T']))

filtered_pd.head(10)
예제 #14
0
                          on='customer_id')

df_final['wholesaler'] = np.where(df_final['predicted_cltv'] < 1000, 0, 1)
df_final['churn_group'] = np.where(df_final['probability_alive'] < .5, 0, 1)

df_final

# Plots and Validation

plot_period_transactions(bgf_mod)

cal_hold = calibration_and_holdout_data(
    df,
    'customer_id',
    'date',
    calibration_period_end='2018-12-31',  #3 years calibration
    observation_period_end='2020-12-31',  #2 year holdout
    freq=frq)

# plots the efficiacy of the model using the hold-out period
plt.rcParams['figure.figsize'] = (20, 10)
bgf = BetaGeoFitter()
bgf.fit(cal_hold['frequency_cal'], cal_hold['recency_cal'], cal_hold['T_cal'])
plot_calibration_purchases_vs_holdout_purchases(bgf, cal_hold)

fig = plt.figure(figsize=(8, 6))
plot_frequency_recency_matrix(bgf_mod)

fig = plt.figure(figsize=(8, 6))
plot_probability_alive_matrix(bgf_mod)
예제 #15
0
           fit_method='Nelder-Mead')
mbgnbd.summary

# In[8]:

from lifetimes import BetaGeoFitter
bgf = BetaGeoFitter(penalizer_coef=0.0001)
bgf.fit(customer_detail['frequency'], customer_detail['recency'],
        customer_detail['T'])
bgf.summary

# In[9]:

#from lifetimes.plotting import plot_probability_alive_matrix
from lifetimes.plotting import plot_frequency_recency_matrix
plot_frequency_recency_matrix(mbgnbd)

# In[10]:

from lifetimes.plotting import plot_period_transactions
plot_period_transactions(bgf)

# In[11]:

t = 90  # days to predict in the future
customer_detail[
    'pred_90d_bgf'] = bgf.conditional_expected_number_of_purchases_up_to_time(
        t, customer_detail['frequency'], customer_detail['recency'],
        customer_detail['T'])
customer_detail.sort_values(by='pred_90d_bgf').tail(5)
예제 #16
0
파일: Main.py 프로젝트: colllz/CLV
#modeldata['frequency'].plot(kind='hist', bins=50)
#print(modeldata['frequency'].describe())
#percentage of customer with no [repeat] order
print(sum(modeldata['frequency'] == 0)/float(len(modeldata)))
#dfnew[dfnew.CustomerID == 12346.0]

from lifetimes import BetaGeoFitter
#parameter eliminates overfitting and noise and robust
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(modeldata['frequency'], modeldata['recency'], modeldata['T'])
print(bgf)

# create frequency recency matrix
from lifetimes.plotting import plot_frequency_recency_matrix

plot_frequency_recency_matrix(bgf) #plot for predict purchase

from lifetimes.plotting import plot_probability_alive_matrix

#probability of being alive
modeldata['Churn_probability'] = bgf.conditional_probability_alive(modeldata['frequency'], modeldata['recency'], modeldata['T'])

#plot for churn or probability of being alive
fig = plt.figure(figsize=(12,8))
plot_probability_alive_matrix(bgf)


# predict number of purchase customer will make
t = 30  #number of days to predict customer will make purchase
modeldata['predicted_num_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(t, modeldata['frequency'], modeldata['recency'], modeldata['T'])
modeldata.sort_values(by='predicted_num_purchases').tail(5)
def visualizeFrequencyRecencyMatrix(betaGeoFitterModel):
    
    plot_frequency_recency_matrix(betaGeoFitterModel)
    
    pylab.savefig("FrequencyRecencyMatrixPlot.png")
예제 #18
0
 def test_plot_frequency_recency_matrix(self):
     plt.figure()
     plotting.plot_frequency_recency_matrix(bgf)
     return plt.gcf()
예제 #19
0
summary['monetary_value'].astype(int).head()

from lifetimes import BetaGeoFitter

bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(summary['frequency'], summary['recency'], summary['T'])
print(bgf)

bgf.summary

from lifetimes.plotting import plot_frequency_recency_matrix
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [10, 10]

plot_frequency_recency_matrix(bgf, title="")

from lifetimes.plotting import plot_probability_alive_matrix

plot_probability_alive_matrix(bgf, title="")

t = 12
summary[
    'predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
        t, summary['frequency'], summary['recency'], summary['T'])
summary.sort_values(by='predicted_purchases').tail(10)

from lifetimes.plotting import plot_period_transactions
plt.rcParams['figure.figsize'] = [12, 3]

plot_period_transactions(bgf)
예제 #20
0
#%%
X_test.to_csv('matrix.csv')

#%%
from lifetimes import BetaGeoFitter

# similar API to scikit-learn and lifelines.
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(X_train['txn_total'], X_train['recency_true']/7,
        X_train['T']/7)
print(bgf)

%matplotlib inline
from lifetimes.plotting import plot_frequency_recency_matrix

plot_frequency_recency_matrix(bgf)

#%%
from lifetimes.plotting import plot_probability_alive_matrix

f=plot_probability_alive_matrix(bgf)

t=52
X_train['predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
    t, X_train['txn_total'], X_train['recency_true']/7,
    X_train['T']/7)
#%%
from lifetimes.plotting import plot_period_transactions
f = plot_period_transactions(bgf)

#%%
예제 #21
0
 def test_plot_frequency_recency_matrix(self):
     plt.figure()
     plotting.plot_frequency_recency_matrix(bgf)
     return plt.gcf()