コード例 #1
2
def calc_clv(clv_recs, end, months=12):
    df = pandas.DataFrame(clv_recs)
    df = df[['player_id', 'start_date', 'theo_win']]
    df['theo_win'] = df['theo_win'].astype(float)
    
    end_date = parse(end)
    summary = summary_data_from_transaction_data(df, 
                                                 'player_id', 
                                                 'start_date', 
                                                 monetary_value_col='theo_win', 
                                                 observation_period_end=end_date)
    bgf = BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(summary['frequency'], summary['recency'], summary['T'])
    
    ggf = GammaGammaFitter(penalizer_coef = 0)
    ggf.fit(summary['frequency'], summary['monetary_value'])
    
    ggf_clv = ggf.customer_lifetime_value(
        bgf, #the model to use to predict the number of future transactions
        summary['frequency'],
        summary['recency'],
        summary['T'],
        summary['monetary_value'],
        time=months, 
        discount_rate=0.0
    )
    clv_df = pandas.DataFrame(ggf_clv)
    clv_df=clv_df.dropna()
    clv_df[clv_df['clv']<0] = 0.0
    summary=summary.merge(clv_df, left_index=True, right_index=True, how='inner')

    return summary
コード例 #2
0
def clv(pareto, mbg, summary):

    returning_customers_summary = summary[summary['frequency'] > 0]

    ggf = GammaGammaFitter(penalizer_coef=0.0)
    ggf.fit(frequency=returning_customers_summary['frequency'],
            monetary_value=returning_customers_summary['monetary_value'])

    pred_clv_pareto = ggf.customer_lifetime_value(
        transaction_prediction_model=pareto,
        frequency=summary['frequency'],
        recency=summary['recency'],
        T=summary['T'],
        monetary_value=summary['monetary_value'],
        time=12,
        freq="D")

    pred_clv_mbg = ggf.customer_lifetime_value(
        transaction_prediction_model=mbg,
        frequency=summary['frequency'],
        recency=summary['recency'],
        T=summary['T'],
        monetary_value=summary['monetary_value'],
        time=12,
        freq="D")

    return pred_clv_pareto, pred_clv_mbg
コード例 #3
0
def create_cltv_pred(dataframe, w=4, m=1):
    """
    Gamagama and BGNBD model and prediction
    Parameters
    ----------
    dataframe
    w: int, week information for BGNBD model
    m: int, month information for gamama model

    Returns
    Dataframe
    -------

    """
    # BGNBD

    dataframe = dataframe[dataframe["monetary_avg"] > 0]
    dataframe["frequency"] = dataframe["frequency"].astype(int)

    bgf = BetaGeoFitter(penalizer_coef=0.001)
    bgf.fit(dataframe['frequency'], dataframe['recency_weekly'],
            dataframe['T_weekly'])

    dataframe[f'exp_sales_{w}_week'] = bgf.predict(w, dataframe['frequency'],
                                                   dataframe['recency_weekly'],
                                                   dataframe['T_weekly'])

    # Gamagama - expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.001)
    ggf.fit(dataframe['frequency'], dataframe['monetary_avg'])
    dataframe[
        "expected_average_profit"] = ggf.conditional_expected_average_profit(
            dataframe['frequency'], dataframe['monetary_avg'])

    # CLTV Prediction
    cltv = ggf.customer_lifetime_value(bgf,
                                       dataframe['frequency'],
                                       dataframe['recency_weekly'],
                                       dataframe['T_weekly'],
                                       dataframe['monetary_avg'],
                                       time=m,
                                       freq="W",
                                       discount_rate=0.01)

    dataframe[f'cltv_p_{m}_month'] = cltv

    scaler = MinMaxScaler(feature_range=(1, 100))
    dataframe['cltv_p_score'] = scaler.fit_transform(
        dataframe[[f'cltv_p_{m}_month']])

    # cltv_p Segment
    dataframe['cltv_p_segment'] = pd.qcut(dataframe['cltv_p_score'],
                                          3,
                                          labels=['C', 'B', 'A'])

    new_col = dataframe.columns[~dataframe.columns.
                                isin(['recency', 'frequency', 'monetary'])]
    dataframe = dataframe[new_col]

    return dataframe
コード例 #4
0
class transactionMonetary(object):

    def summary_trans_create(self, df):
        '''
        Subset df on sales data, return trans summary with monetary spend
        '''
        sales = subset_data(df, 'OrderType', 1)
        sales = sales[sales.OrderTotal>0]
        transaction_data_monetary = sales[['OrderDate', 'CustomerNo', 'OrderTotal']]
        self.summary_monetary = summary_data_from_transaction_data(transaction_data_monetary, 'CustomerNo', 'OrderDate', 'OrderTotal', observation_period_end='2017-02-08')
        #keep customers with more than one spend
        self.return_customers = self.summary_monetary[self.summary_monetary['frequency']>0]
        return self.return_customers

    def fit_ggf(self):
        self.ggf = GammaGammaFitter(penalizer_coef = 0)
        self.ggf.fit(self.return_customers['frequency'], self.return_customers['monetary_value'])

    def summaryOutput(self, discount_rate=0.12, months=12):
        '''
        Fit beta geometric model to calculate CLV, and use GG model to calculate expected profit
        Per customer
        Write out CLV and profits to csv, print out averages to screen
        '''
        beta_model = BetaGeoFitter()
        #calulate average transaction value
        self.summary_monetary['avg_transaction_value'] = self.ggf.conditional_expected_average_profit(
        self.summary_monetary['frequency'],
        self.summary_monetary['monetary_value'])
        #fit beta geo model
        beta_model.fit(self.summary_monetary['frequency'], self.summary_monetary['recency'], self.summary_monetary['T'])
        #calculate clv, with discount rate calulated over year (default)
        disc_rate = discount_rate/months/30
        self.summary_monetary['clv'] = self.ggf.customer_lifetime_value(
        beta_model, #the model to use to predict the number of future transactions
        self.summary_monetary['frequency'],
        self.summary_monetary['recency'],
        self.summary_monetary['T'],
        self.summary_monetary['monetary_value'], time=months, # months
        discount_rate=disc_rate # monthly discount rate ~ 12.7% annually
        )
        #print customer data with calculations
        self.summary_monetary.to_csv("CLV_AVG_transactionValue_perCustomer.csv", index=False)
        #print summary stats
        print("Expected conditional average profit: {}, Average profit: {}".format(
        self.ggf.conditional_expected_average_profit(
            self.summary_monetary['frequency'],
            self.summary_monetary['monetary_value']).mean(),
        self.summary_monetary[self.summary_monetary['frequency']>0]['monetary_value'].mean()))
コード例 #5
0
def gg_model(rfmmod, bgf, p, f):
    # Build the Model
    ret_cust = rfmmod[(rfmmod['frequency'] > 0)
                      & (rfmmod['monetary_value'] > 0)]
    ggf = GammaGammaFitter(penalizer_coef=p)
    ggf.fit(ret_cust['frequency'], ret_cust['monetary_value'])
    pred_clt = ggf.customer_lifetime_value(
        bgf,
        ret_cust['frequency'],
        ret_cust['recency'],
        ret_cust['T'],
        ret_cust['monetary_value'],
        time=12,  # months
        freq=f,
        discount_rate=0.01)
    ret_cust['predicted_cltv'] = pred_clt
    ret_cust['exp_profit'] = ggf.conditional_expected_average_profit(
        ret_cust['frequency'], ret_cust['monetary_value'])
    ret_cust = ret_cust.sort_values('predicted_cltv', ascending=False).round(3)
    return ret_cust
コード例 #6
0
# dtype: float64


rfm_cltv["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm_cltv["Frequency"],
                                                                              rfm_cltv["monetary_avg"])

rfm_cltv.sort_values("expected_average_profit", ascending=False).head(20)

# ##################################################################
#   CLTV PREDICTION by combining  BG/NBD and GAMMA-GAMMA MODEL
# ##################################################################

cltv = ggf.customer_lifetime_value(bgf,
                                   rfm_cltv["Frequency"],
                                   rfm_cltv["Recency_weekly"],
                                   rfm_cltv["T_weekly"],
                                   rfm_cltv["monetary_avg"],
                                   time=3,  # for 3 months
                                   freq="W",  # weekly frequency
                                   discount_rate=0.01)

cltv.shape       # (4338,)
cltv = cltv.reset_index()
cltv.sort_values(by="clv", ascending=False).head(10)

# ⭐ 10 Most Valuable Customers! 

#       Customer ID         clv
# 2678        16000    11794.07113
# 2087        15195     4641.70350
# 715         13298     1140.24615
# 2011        15098      988.92748
コード例 #7
0
rfm.sort_values("expected_average_profit", ascending=False).head(20)


# geçmiş ayların ortalama satış sayılarını bulma?



##############################################################
# 4. BG-NBD ve GG modeli ile CLTV'nin hesaplanması.
##############################################################
# GÖREV - 1
# 2010-2011 UK müşterileri için 6 aylık CLTV prediction
cltv = ggf.customer_lifetime_value(bgf,
                                   rfm['frequency'],
                                   rfm['recency_weekly_p'],
                                   rfm['tenure_weekly_p'],
                                   rfm['monetary_avg'],
                                   time=6,
                                   freq="W",
                                   discount_rate=0.01)
cltv.head()
cltv.shape
cltv = cltv.reset_index()
cltv.sort_values(by="clv", ascending=False).head(50)
rfm_cltv_final = rfm.merge(cltv, on="Customer ID", how="left")
rfm_cltv_final.sort_values(by="clv", ascending=False).head(7)

rfm_cltv_final[rfm_cltv_final["Customer ID"] == 12748.00000]
# GÖREV - 2
# 2010-2011 UK müşterileri için 1 aylık ve 12 aylık CLTV prediction
# - 1 aylık CLTV'de en yüksek olan 10 kişi ile 12 aylık'taki en yüksek 10 kişiyi analiz ediniz.
# - Fark var mı? Varsa sizce neden olabilir?
コード例 #8
0
def generate_clv_table(data, clv_prediction_time=None, model_penalizer=None):

    #set default values if they are not stated
    if clv_prediction_time is None:
        clv_prediction_time = 12
    if model_penalizer is None:
        model_penalizer = 0

    # Reformat csv as a Pandas dataframe
    #data = pd.read_csv(csv_file)

    #Remove non search sessions
    data = data[data['Searches'] > 0]

    max_date = data['activity_date'].max()

    # Using "summary_data_from_transaction_data" function to agregate the activity stream into the appropriate metrics
    # Model requires 'activity_date' column name.  For our purpose this is synonymous with submission_date.
    summary = summary_data_from_transaction_data(
        data,
        'client_id',
        'activity_date',
        'Revenue',
        observation_period_end=max_date)

    # Building the Model using BG/NBD
    bgf = BetaGeoFitter(penalizer_coef=model_penalizer)
    bgf.fit(summary['frequency'], summary['recency'], summary['T'])

    # Conditional expected purchases
    # These are the expected purchases expected from each individual given the time specified

    # t = days in to future
    t = 14
    summary[
        'predicted_searches'] = bgf.conditional_expected_number_of_purchases_up_to_time(
            t, summary['frequency'], summary['recency'], summary['T'])

    #Conditional Alive Probability
    summary['alive_prob'] = summary.apply(
        lambda row: calc_alive_prob(row, bgf), axis=1)
    summary['alive_prob'] = summary['alive_prob'].astype(float)
    #print summary['alive_prob']

    # There cannot be non-positive values in the monetary_value or frequency vector
    summary_with_value_and_returns = summary[(summary['monetary_value'] > 0)
                                             & (summary['frequency'] > 0)]

    # There cannot be zero length vectors in one of frequency, recency or T
    #summary_with_value_and_returns =
    #print summary_with_value_and_returns[
    #    (len(summary_with_value_and_returns['recency'])>0) &
    #    (len(summary_with_value_and_returns['frequency'])>0) &
    #    (len(summary_with_value_and_returns['T'])>0)
    #]

    if any(
            len(x) == 0 for x in [
                summary_with_value_and_returns['recency'],
                summary_with_value_and_returns['frequency'],
                summary_with_value_and_returns['T']
            ]):
        logger.debug(data['client_id'])

    # Setting up Gamma Gamma model
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(summary_with_value_and_returns['frequency'],
            summary_with_value_and_returns['monetary_value'])

    # Output average profit per tranaction by client ID
    ggf_output = ggf.conditional_expected_average_profit(
        summary_with_value_and_returns['frequency'],
        summary_with_value_and_returns['monetary_value'])

    # Refitting the BG/NBD model with the same data if frequency, recency or T are not zero length vectors
    if not (len(x) == 0 for x in [
            summary_with_value_and_returns['recency'],
            summary_with_value_and_returns['frequency'],
            summary_with_value_and_returns['T']
    ]):
        bgf.fit(summary_with_value_and_returns['frequency'],
                summary_with_value_and_returns['recency'],
                summary_with_value_and_returns['T'])

    # Getting Customer lifetime value using the Gamma Gamma output
    # NOTE: the time can be adjusted, but is currently set to 12 months

    customer_predicted_value = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        summary_with_value_and_returns['frequency'],
        summary_with_value_and_returns['recency'],
        summary_with_value_and_returns['T'],
        summary_with_value_and_returns['monetary_value'],
        time=clv_prediction_time,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    )

    # Converting to dataframe
    df_cpv = pd.DataFrame({
        'client_id': customer_predicted_value.index,
        'pred_values': customer_predicted_value.values
    })

    # Setting client_id as index
    df_cpv = df_cpv.set_index('client_id')

    # Merge with original summary
    df_merged = pd.merge(summary,
                         df_cpv,
                         left_index=True,
                         right_index=True,
                         how='outer')

    # Historical CLV
    data_hist = data.groupby(
        ['client_id'])['Searches',
                       'Revenue'].apply(lambda x: x.astype(float).sum())

    # Merge with original summary
    df_final = pd.merge(df_merged,
                        data_hist,
                        left_index=True,
                        right_index=True,
                        how='outer')

    # Prevent NaN on the pred_clv column
    df_final.pred_values[df_final.frequency == 0] = 0.0

    # Create column that combines historical and predicted customer value
    df_final['total_clv'] = df_final['pred_values'] + df_final['Revenue']

    # Create column which calculates in days the number of days since they were last active
    df_final['last_active'] = df_final['T'] - df_final['recency']

    # Create a column which labels users inactive over 14 days as "Expired" ELSE "Active"
    df_final['user_status'] = np.where(df_final['last_active'] > 14, 'Expired',
                                       'Active')

    # Add column with date of calculation
    # Set calc_date to max submission date
    df_final['calc_date'] = max_date.date()  #pd.Timestamp('today').date()

    # Rename columns as appropriate
    df_final.columns = [
        'frequency', 'recency', 'customer_age', 'avg_session_value',
        'predicted_searches_14_days', 'alive_probability',
        'predicted_clv_12_months', 'historical_searches', 'historical_clv',
        'total_clv', 'days_since_last_active', 'user_status', 'calc_date'
    ]

    #Prevent non returning customers from having 100% alive probability
    df_final.alive_probability[df_final.frequency == 0] = 0.0

    return df_final
コード例 #9
0
ggf.conditional_expected_average_profit(df["FREQUENCY"],
                                        df["MONETARY_VALUE"]).head(10)

print("Expected conditional average profit: %s, Average profit: %s" %
      (ggf.conditional_expected_average_profit(df["FREQUENCY"],
                                               df["MONETARY_VALUE"]).mean(),
       df[df["FREQUENCY"] > 0]["MONETARY_VALUE"].mean()))

bgf.fit(df["FREQUENCY"], df["RECENCY"], df["T"])

pred = ggf.customer_lifetime_value(
    bgf,  #the model to use to predict the number of future transactions
    df["FREQUENCY"],
    df["RECENCY"],
    df["T"],
    df["MONETARY_VALUE"],
    time=1,  # year
    discount_rate=0.02  # campaignly discount rate ~ 20% annually
)

pred.head(10)
pred.tail(10)
pred.mean()
pred.median()

df["MONETARY_VALUE"].mean()
df["T"].mean()

df[df["T"] < 14]["T"].count()
df[df["T"] > 13]["T"].count()
コード例 #10
0
ファイル: Main.py プロジェクト: colllz/CLV
returning_customers_summary = returning_customers_summary[returning_customers_summary['monetary_value']>0]

returning_customers_summary['predicted_avg_sales']=ggf.conditional_expected_average_profit(returning_customers_summary['frequency'],returning_customers_summary['monetary_value'])



# checking the expevred average value and the actual average value in the data to make sure the values are good
print(f"Expected Average sales: {returning_customers_summary['predicted_avg_sales'].mean()}")
print(f"Actual Average sales: {returning_customers_summary['monetary_value'].mean()}")
# The values seem to be fine

#calculating CLV for 1 month
returning_customers_summary['Predicted_CLV'] = ggf.customer_lifetime_value(bgf,
                                                                           returning_customers_summary['frequency'],
                                                                           returning_customers_summary['recency'],
                                                                           returning_customers_summary['T'],
                                                                           returning_customers_summary['monetary_value'],
                                                                           time=1, # lifetime in months
                                                                           freq='D', # frequency in which data is present (T),
                                                                           discount_rate=0.01 #discount rate
                                                                           )                                                   
# calculate CLV manual
#returning_customers_summary['manual_predict_clv']= returning_customers_summary['predicted_num_purchases'] * returning_customers_summary['predicted_avg_sales']
#calculate CLV profit
profit_margin=0.05
returning_customers_summary['profit_CLV'] =returning_customers_summary['Predicted_CLV'] * profit_margin
 

############ THE END OF CLV ###################
コード例 #11
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    # recency user-specific
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days,       # "recency_cltv_p"
                                                                lambda date: (today_date - date.min()).days],      # "T"
                                                'Invoice': lambda num: num.nunique(),                              # "frequency"
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})                # "monetary"
    rfm.columns = rfm.columns.droplevel(0)

    # recency_cltv_p
    rfm.columns = ["recency_cltv_p", "T", "frequency", "monetary"]

    # Simplified monetary_avg (since Gamma-Gamma model requires this way)
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]
    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # Calculating WEEKLY RECENCY VE WEEKLY T for BG/NBD MODEL
    # recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CHECK IT OUT! Monetary avg must be positive
    rfm = rfm[rfm["monetary_avg"] > 0]

    # recency filter
    rfm = rfm[(rfm["frequency"] > 1)]
    rfm["frequency"] = rfm["frequency"].astype(int)  # converting it to integer just in case!

    # Establishing the BGNBD Model
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm["frequency"],
            rfm["recency_weekly_cltv_p"],
            rfm["T_weekly"])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm["frequency"],
                                           rfm["recency_weekly_cltv_p"],
                                           rfm["T_weekly"])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm["frequency"],
                                           rfm["recency_weekly_cltv_p"],
                                           rfm["T_weekly"])

    # Establishing Gamma-Gamma Model  calculates=> Expected Average Profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm["frequency"], rfm["monetary_avg"])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm["frequency"],
                                                                             rfm["monetary_avg"])
    # CLTV Pred for 6 months
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm["frequency"],
                                       rfm["recency_weekly_cltv_p"],
                                       rfm["T_weekly"],
                                       rfm["monetary_avg"],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # Minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # rfm.fillna(0, inplace=True)

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    # recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm
コード例 #12
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (today_date - date.max()).days,
                                                                lambda date: (today_date - date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})

    rfm.columns = rfm.columns.droplevel(0)

    rfm.columns = ['recency', 'T', 'frequency', 'monetary']

    # CALCULATION OF MONETARY AVG & ADDING RFM INTO DF
    temp_df = dataframe.groupby(["Customer ID", "Invoice"]).agg({"TotalPrice": ["mean"]})
    temp_df = temp_df.reset_index()
    temp_df.columns = temp_df.columns.droplevel(0)
    temp_df.columns = ["Customer ID", "Invoice", "total_price_mean"]
    temp_df2 = temp_df.groupby(["Customer ID"], as_index=False).agg({"total_price_mean": ["mean"]})
    temp_df2.columns = temp_df2.columns.droplevel(0)
    temp_df2.columns = ["Customer ID", "monetary_avg"]

    rfm = rfm.merge(temp_df2, how="left", on="Customer ID")
    rfm.set_index("Customer ID", inplace=True)
    rfm.index = rfm.index.astype(int)

    # CALCULATION OF WEEKLY RECENCY AND WEEKLY T FOR BGNBD
    rfm["recency_weekly"] = rfm["recency"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]
    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.001)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.001)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 MONTHS cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    rfm = rfm[["monetary_avg", "T", "recency_weekly", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm
コード例 #13
0
    rfm['monetary_avg']).sort_values(ascending=False).head(10)

rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(
    rfm['frequency'], rfm['monetary_avg'])

rfm.sort_values("expected_average_profit", ascending=False).head(20)

##############################################################
# 4. BG-NBD ve GG modeli ile CLTV'nin hesaplanması.
##############################################################

cltv = ggf.customer_lifetime_value(
    bgf,
    rfm['frequency'],
    rfm['recency_weekly_p'],
    rfm['T_weekly'],
    rfm['monetary_avg'],
    time=3,  # 3 aylık
    freq="W",  # T'nin frekans bilgisi.
    discount_rate=0.01)

cltv.head()

cltv.shape
cltv = cltv.reset_index()
cltv.sort_values(by="clv", ascending=False).head(50)
rfm_cltv_final = rfm.merge(cltv, on="Customer ID", how="left")
rfm_cltv_final.head()

# Bundan sonra ne olur?
# Holdout yöntemi ile zamana göre benchmark yapılması gerekir.
コード例 #14
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    ## recency for users dinamic.
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days,
                                                                lambda date: (today_date - date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})

    rfm.columns = rfm.columns.droplevel(0)

    ## recency_cltv_p
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    ## simplified monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]

    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # BGNBD CALCULATE WEEKLY RECENCY AND WEEKLY T for
    ## recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]

    ## recency filtre (cltv_p for much better calculation)
    rfm = rfm[(rfm['frequency'] > 1)]

    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly_cltv_p'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 months cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    ## recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm
コード例 #15
0
ファイル: CLTV.py プロジェクト: EkremBayar/myprojects
bgf.fit(rfm["frequency"], rfm["recency_weekly_p"],
        rfm["T_weekly"])  # BGNBD fitted.

########################################
# Gamma Gamma
########################################

ggf = GammaGammaFitter(penalizer_coef=0.01)  # Gamma Gamma created.
ggf.fit(rfm["frequency"], rfm["monetary_avg"])  # Gamma gamma fitted.

# 6 Months CLTV Prediction

cltv_6_months = ggf.customer_lifetime_value(bgf,
                                            rfm['frequency'],
                                            rfm['recency_weekly_p'],
                                            rfm['T_weekly'],
                                            rfm['monetary_avg'],
                                            time=6,
                                            freq="W",
                                            discount_rate=0.01)

cltv_6_months = cltv_6_months.reset_index(
)  # indexes are broken. Reset_index fixed it.
cltv_6_months.sort_values(by="clv", ascending=False)

# 1 Month CLTV Prediction

cltv_1_month = ggf.customer_lifetime_value(bgf,
                                           rfm['frequency'],
                                           rfm['recency_weekly_p'],
                                           rfm['T_weekly'],
                                           rfm['monetary_avg'],
コード例 #16
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)
    # rfm metriklerini + tenure oluşturma
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max()-date.min()).days,
                                                                lambda date: (today_date-date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})
    rfm.columns.droplevel(0)
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    # basitleştirilmiş monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]
    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # bgnbd için haftalık recency,tenure hesaplanması
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # kontrol
    rfm = rfm[rfm["monetary_avg"] > 0]
    rfm = rfm[(rfm['frequency'] > 1)]
    rfm["frequency"] = rfm["frequency"].astype(int)

    # bgnbd
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly_cltv_p'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # expected_avg_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 aylık cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)
    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    ## recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm
コード例 #17
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate': [
            lambda date: (date.max() - date.min()).days, lambda date:
            (today_date - date.min()).days
        ],
        'Invoice':
        lambda num: num.nunique(),
        'TotalPrice':
        lambda price: price.sum()
    })
    rfm.columns = rfm.columns.droplevel(0)

    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']
    rfm['monetary'] = rfm['monetary'] / rfm['frequency']

    rfm.rename(columns={'monetary': 'monetary_avg'}, inplace=True)

    rfm["recency_weekly_cltv_p"] = rfm['recency_cltv_p'] / 7
    rfm['T_weekly'] = rfm['T'] / 7

    rfm = rfm[rfm['monetary_avg'] > 0]
    rfm = rfm[(rfm['frequency'] > 1)]
    rfm['frequency'] = rfm['frequency'].astype(int)

    #BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'])

    rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    #Gamma Gamma
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(
        rfm['frequency'], rfm['monetary_avg'])

    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq='W',
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    rfm = rfm[[
        "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p",
        "T_weekly", "exp_sales_1_month", "exp_sales_3_month",
        "expected_average_profit", "cltv_p", "cltv_p_segment"
    ]]

    return rfm
コード例 #18
0
# GAMMA-GAMMA
#####
ggf = GammaGammaFitter (penalizer_coef=0.01)
ggf.fit (cltv["frequency"], cltv["monetary_avg"])

cltv["expected_average_profit"] = ggf.conditional_expected_average_profit (cltv["frequency"], cltv["monetary_avg"])

cltv.sort_values (by="expected_average_profit", ascending=False).head ()

###########
# 4. CLTV calculation with BG-NBD and GG models
###########
cltv["cltv_six_months"] = ggf.customer_lifetime_value (bgf,
                                                       cltv["frequency"],
                                                       cltv["recency_weekly"],
                                                       cltv["T_weekly"],
                                                       cltv["monetary_avg"],
                                                       time=6,
                                                       discount_rate=0.01,
                                                       freq="W")

### Best 5 customers for expected CLTV for 6 months
cltv.sort_values (by="cltv_six_months", ascending=False).head ()

cltv.describe ()

plot_cltv = cltv.quantile ([0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1])

import seaborn as sns
import matplotlib.pyplot as plt

sns.scatterplot (x=plot_cltv.index, y=plot_cltv["cltv_six_months"], data=plot_cltv)
コード例 #19
0
gg = GammaGammaFitter(penalizer_coef=0.001)
gg.fit(customer_detail['frequency'],
       customer_detail['avg_order_value'],
       verbose=True)

print(
    gg.conditional_expected_average_profit(
        customer_detail['frequency'],
        customer_detail['avg_order_value']).head(10))

# In[16]:

customer_detail['clv'] = gg.customer_lifetime_value(
    mbgnbd,
    customer_detail['frequency'],
    customer_detail['recency'],
    customer_detail['T'],
    customer_detail['avg_order_value'],
    time=t,
    discount_rate=0).astype(int)
customer_detail[[
    'frequency', 'pred_90d_bgf', 'monetary', 'avg_order_value', 'clv'
]].head()

# In[17]:

customer_detail['exp_orders'] = (
    customer_detail['clv'] / gg.conditional_expected_average_profit(
        customer_detail['frequency'],
        customer_detail['avg_order_value'])).astype(int)
customer_detail['potential'] = 100 - (
    (100 / customer_detail['clv']) * customer_detail['monetary'])
コード例 #20
0
ファイル: lifetimes_clv.py プロジェクト: zdhiman/bundles
with_frequency.head()

with_frequency[['monetary_value', 'frequency']].corr()

from lifetimes import GammaGammaFitter

ggf = GammaGammaFitter(penalizer_coef=0)
ggf.fit(with_frequency['frequency'], with_frequency['monetary_value'])

ggf

ggf.conditional_expected_average_profit(data['frequency'],
                                        data['monetary_value']).head(20)

"Expected conditional average profit: %s, Average profit: %s" % (
    ggf.conditional_expected_average_profit(data['frequency'],
                                            data['monetary_value']).mean(),
    data[data['frequency'] > 0]['monetary_value'].mean())

bgf.fit(data['frequency'], data['recency'], data['T'])

ggf.customer_lifetime_value(
    bgf,  #the model to use to predict the number of future transactions
    data['frequency'],
    data['recency'],
    data['T'],
    data['monetary_value'],
    time=2,  # months
    discount_rate=0.1  # monthly discount rate
).head(10)
コード例 #21
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    # recency kullanıcıya özel dinamik.
    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate': [
            lambda date: (date.max() - date.min()).days, lambda date:
            (today_date - date.min()).days
        ],
        'Invoice':
        lambda num: num.nunique(),
        'TotalPrice':
        lambda TotalPrice: TotalPrice.sum()
    })

    rfm.columns = rfm.columns.droplevel(0)

    # recency_cltv_p
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    # basitleştirilmiş monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]

    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # BGNBD için WEEKLY RECENCY VE WEEKLY T'nin HESAPLANMASI
    # recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # KONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]

    # recency filtre (daha saglıklı cltvp hesabı için)
    rfm = rfm[(rfm['frequency'] > 1)]

    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(
        rfm['frequency'], rfm['monetary_avg'])
    # 6 aylık cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # rfm.fillna(0, inplace=True)

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    # recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[[
        "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p",
        "T_weekly", "exp_sales_1_month", "exp_sales_3_month",
        "expected_average_profit", "cltv_p", "cltv_p_segment"
    ]]

    return rfm
コード例 #22
0
ファイル: online_retail_cltv.py プロジェクト: simgeerek/CLTV
# Tahmin edilen monetary değeri combined_data'ya ekleme
combined_data["monetary_value_predict"] = monetary_pred
combined_data.head()

##############################################################
# CLV MODEL
##############################################################
# Bu model expected purchase tahmini alacak ve expected purchase value ile birleştirecektir.
# Belirli bir süre içinde bir müşterinin  ne kadar değerli olduğuna dair bir tahmine ulaşılmasını sağlar.

clv = ggf.customer_lifetime_value(
    bgf,  #the model to use to predict the number of future transactions
    combined_data['frequency_cal'],
    combined_data['recency_cal'],
    combined_data['T_cal'],
    combined_data['monetary_value_cal'],
    time=4,  # months
    freq="D",  # T'nin frekans bilgisi
    discount_rate=0.01)

clv.head()
combined_data["CLV"] = clv
combined_data.head(20)

# Bunlar ilk 10 en değerli müşteri önümüzdeki 4 ay için
combined_data.sort_values('CLV', ascending=False).head(10)

# CLV modelinin performansını nasıl değerlendiririz?
# Simple bir baseline ile karşılaştırabiliriz.
# Target olarak en iyi müşterilerin %20 sini seçelim.
コード例 #23
0
ファイル: CLV.py プロジェクト: lizzzfang/Customer-Analytics
ggf = GammaGammaFitter(penalizer_coef=0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
print(ggf)

# estimate the average transaction value
print(
    ggf.conditional_expected_average_profit(data['frequency'],
                                            data['monetary_value']).head(10))

# refit the BG model to the summary_with_money_value dataset
bgf.fit(data['frequency'], data['recency'], data['T'])

CLV_12M = ggf.customer_lifetime_value(
    bgf,  # the model to use to predict the number of future transactions
    data['frequency'],
    data['recency'],
    data['T'],
    data['monetary_value'],
    time=12,  # months
    discount_rate=0.01  # monthly discount rate ~ 12.7% annually
)

CLV_12M = pd.DataFrame({
    customer_id: CLV_12M.index,
    'CLV_12_months': CLV_12M.values
})
print(CLV_12M.head(10))
CLV_12M.to_csv('CLV.csv', index=False)
コード例 #24
0
ggf = GammaGammaFitter(penalizer_coef=0.0)
ggf.fit(summary_ggf['frequency'], summary_ggf['monetary_value'])

ggf.conditional_expected_average_profit(summary_ggf['frequency'],
                                        summary_ggf['monetary_value']).head(10)

bgf.fit(summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T'])

bgf.fit(summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T'])

ggf.customer_lifetime_value(
    bgf,  #the model to use to predict the number of future transactions
    summary_ggf['frequency'],
    summary_ggf['recency'],
    summary_ggf['T'],
    summary_ggf['monetary_value'],
    time=12,  # months
    discount_rate=0.01  # monthly discount rate ~ 12.7% annually
).head(10)

ggf_CLV = ggf.customer_lifetime_value(
    bgf,  #the model to use to predict the number of future transactions
    summary_ggf['frequency'],
    summary_ggf['recency'],
    summary_ggf['T'],
    summary_ggf['monetary_value'],
    time=12,  # months
    discount_rate=0.01  # monthly discount rate ~ 12.7% annually
)
コード例 #25
0
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context):
    import matplotlib.pyplot
    matplotlib.pyplot.ioff()
    ##
    from lifetimes.utils import calibration_and_holdout_data
    from lifetimes.plotting import plot_frequency_recency_matrix
    from lifetimes.plotting import plot_probability_alive_matrix
    from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases
    from lifetimes.plotting import plot_period_transactions
    from lifetimes.plotting import plot_history_alive
    from lifetimes.plotting import plot_cumulative_transactions
    from lifetimes.utils import expected_cumulative_transactions
    from lifetimes.utils import summary_data_from_transaction_data
    from lifetimes import BetaGeoFitter
    from lifetimes import GammaGammaFitter
    import datetime
    import pandas as pd
    import datalab.storage as gcs
    conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn()
    print(src_client_id, context)
    query = context['templates_dict']['query']
    data = pd.read_sql(query, con=conn)
    data.columns = data.columns.str.lower()
    print(data.head())

    # Calculate RFM values#
    calibration_end_date = datetime.datetime(2018, 5, 24)
    training_rfm = calibration_and_holdout_data(
        transactions=data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        calibration_period_end=calibration_end_date,
        freq='D',
        monetary_value_col='price_total')
    bgf = BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'],
            training_rfm['T_cal'])
    print(bgf)

    # Matrix charts
    plot_period_transactions_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_period_transactions_chart.svg'
    plot_frequency_recency_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_frequency_recency_matrix.svg'
    plot_probability_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_probability_alive_matrix.svg'
    plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_calibration_vs_holdout_purchases.svg'

    ax0 = plot_period_transactions(bgf, max_frequency=30)
    ax0.figure.savefig(plot_period_transactions_chart, format='svg')
    ax1 = plot_frequency_recency_matrix(bgf)
    ax1.figure.savefig(plot_frequency_recency_chart, format='svg')
    ax2 = plot_probability_alive_matrix(bgf)
    ax2.figure.savefig(plot_probability_chart, format='svg')
    ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf,
                                                          training_rfm,
                                                          n=50)
    ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg')
    full_rfm = summary_data_from_transaction_data(
        data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        monetary_value_col='price_total',
        datetime_format=None,
        observation_period_end=None,
        freq='D')
    returning_full_rfm = full_rfm[full_rfm['frequency'] > 0]
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(returning_full_rfm['frequency'],
            returning_full_rfm['monetary_value'])

    customer_lifetime = 30  # expected number of months lifetime of a customer
    clv = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        full_rfm['frequency'],
        full_rfm['recency'],
        full_rfm['T'],
        full_rfm['monetary_value'],
        time=customer_lifetime,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    ).sort_values(ascending=False)
    full_rfm_with_value = full_rfm.join(clv)

    full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str(
        src_client_id) + '-icabbi-test.csv'
    full_rfm_with_value.to_csv(full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            full_rfm_file,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_period_transactions_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_frequency_recency_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_probability_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_calibration_vs_holdout_chart,
            filename=full_rfm_file)
コード例 #26
0
    returning_customers_summary['frequency'],
    returning_customers_summary['monetary_value'])
AVG_Profit = pd.Series(AVG_Profit)

############################### Customer Life Time Value Calculationn ##########
# refit the BG model to the summary_with_money_value dataset, #the model to use to predict the number of future transactions
from lifetimes import BetaGeoFitter
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['recency'],
        returning_customers_summary['T'])

CLV_1Year = ggf.customer_lifetime_value(
    bgf,
    returning_customers_summary['frequency'],
    returning_customers_summary['recency'],
    returning_customers_summary['T'],
    returning_customers_summary['monetary_value'],
    time=12,
    freq='D')
CLV_1Year = pd.Series(CLV_1Year)

################# Churn Probability ###############################
# probability of being churn: model is going to predict customer churn, i.e probability of customer being dead or probability that a customer will leave
alive = bgf.conditional_probability_alive(
    returning_customers_summary['frequency'],
    returning_customers_summary['recency'], returning_customers_summary['T'])

################ Final Output ###############################
returning_customers_summary2 = returning_customers_summary.copy()
returning_customers_summary2['Churn_Probability'] = 1 - alive
returning_customers_summary2['AVG_SALE'] = AVG_Profit