class transactionMonetary(object): def summary_trans_create(self, df): ''' Subset df on sales data, return trans summary with monetary spend ''' sales = subset_data(df, 'OrderType', 1) sales = sales[sales.OrderTotal>0] transaction_data_monetary = sales[['OrderDate', 'CustomerNo', 'OrderTotal']] self.summary_monetary = summary_data_from_transaction_data(transaction_data_monetary, 'CustomerNo', 'OrderDate', 'OrderTotal', observation_period_end='2017-02-08') #keep customers with more than one spend self.return_customers = self.summary_monetary[self.summary_monetary['frequency']>0] return self.return_customers def fit_ggf(self): self.ggf = GammaGammaFitter(penalizer_coef = 0) self.ggf.fit(self.return_customers['frequency'], self.return_customers['monetary_value']) def summaryOutput(self, discount_rate=0.12, months=12): ''' Fit beta geometric model to calculate CLV, and use GG model to calculate expected profit Per customer Write out CLV and profits to csv, print out averages to screen ''' beta_model = BetaGeoFitter() #calulate average transaction value self.summary_monetary['avg_transaction_value'] = self.ggf.conditional_expected_average_profit( self.summary_monetary['frequency'], self.summary_monetary['monetary_value']) #fit beta geo model beta_model.fit(self.summary_monetary['frequency'], self.summary_monetary['recency'], self.summary_monetary['T']) #calculate clv, with discount rate calulated over year (default) disc_rate = discount_rate/months/30 self.summary_monetary['clv'] = self.ggf.customer_lifetime_value( beta_model, #the model to use to predict the number of future transactions self.summary_monetary['frequency'], self.summary_monetary['recency'], self.summary_monetary['T'], self.summary_monetary['monetary_value'], time=months, # months discount_rate=disc_rate # monthly discount rate ~ 12.7% annually ) #print customer data with calculations self.summary_monetary.to_csv("CLV_AVG_transactionValue_perCustomer.csv", index=False) #print summary stats print("Expected conditional average profit: {}, Average profit: {}".format( self.ggf.conditional_expected_average_profit( self.summary_monetary['frequency'], self.summary_monetary['monetary_value']).mean(), self.summary_monetary[self.summary_monetary['frequency']>0]['monetary_value'].mean()))
def create_cltv_pred(dataframe, w=4, m=1): """ Gamagama and BGNBD model and prediction Parameters ---------- dataframe w: int, week information for BGNBD model m: int, month information for gamama model Returns Dataframe ------- """ # BGNBD dataframe = dataframe[dataframe["monetary_avg"] > 0] dataframe["frequency"] = dataframe["frequency"].astype(int) bgf = BetaGeoFitter(penalizer_coef=0.001) bgf.fit(dataframe['frequency'], dataframe['recency_weekly'], dataframe['T_weekly']) dataframe[f'exp_sales_{w}_week'] = bgf.predict(w, dataframe['frequency'], dataframe['recency_weekly'], dataframe['T_weekly']) # Gamagama - expected_average_profit ggf = GammaGammaFitter(penalizer_coef=0.001) ggf.fit(dataframe['frequency'], dataframe['monetary_avg']) dataframe[ "expected_average_profit"] = ggf.conditional_expected_average_profit( dataframe['frequency'], dataframe['monetary_avg']) # CLTV Prediction cltv = ggf.customer_lifetime_value(bgf, dataframe['frequency'], dataframe['recency_weekly'], dataframe['T_weekly'], dataframe['monetary_avg'], time=m, freq="W", discount_rate=0.01) dataframe[f'cltv_p_{m}_month'] = cltv scaler = MinMaxScaler(feature_range=(1, 100)) dataframe['cltv_p_score'] = scaler.fit_transform( dataframe[[f'cltv_p_{m}_month']]) # cltv_p Segment dataframe['cltv_p_segment'] = pd.qcut(dataframe['cltv_p_score'], 3, labels=['C', 'B', 'A']) new_col = dataframe.columns[~dataframe.columns. isin(['recency', 'frequency', 'monetary'])] dataframe = dataframe[new_col] return dataframe
def predictSpending(customerId): # initialize the data dictionary that will be returned data = {"success": False, "result": {"customerId": "", "y": 0.0}} # ensure the customer ID was properly uploaded to our endpoint if customerId: print("* get data") data = pandas.read_csv("sample_transactions.csv") #data = pandas.read_json(baseURL + "/api/transactions") #data = data.drop(columns="_id") print("* prepare data") # prepare and shaping the data # columns - # customerId # frequency : number of repeat purchase transactions # recency: time (in days) between first purchase and latest purchase # T: time (in days) between first purchase and end of the period under study # monetary_value: average transactions amount today = pandas.to_datetime(datetime.date.today()) summaryData = summary_data_from_transaction_data( data, "customerId", "transactionDate", monetary_value_col="transactionAmount", observation_period_end=today) # filter the customer data that has no transaction analysisData = summaryData[summaryData["frequency"] > 0] # get the stat of the particular customer customer = analysisData.loc[customerId] # load model ggf_loaded = GammaGammaFitter() ggf_loaded.load_model('ggf.pkl') # estimate the average transaction amount predict = ggf_loaded.conditional_expected_average_profit( customer["frequency"], customer['monetary_value']) # add the input and predicted output to the return data data = { "success": True, "result": { "customerId": customerId, "y": predict } } # return the data dictionary as a JSON response return flask.jsonify(data)
def gg_model(rfmmod, bgf, p, f): # Build the Model ret_cust = rfmmod[(rfmmod['frequency'] > 0) & (rfmmod['monetary_value'] > 0)] ggf = GammaGammaFitter(penalizer_coef=p) ggf.fit(ret_cust['frequency'], ret_cust['monetary_value']) pred_clt = ggf.customer_lifetime_value( bgf, ret_cust['frequency'], ret_cust['recency'], ret_cust['T'], ret_cust['monetary_value'], time=12, # months freq=f, discount_rate=0.01) ret_cust['predicted_cltv'] = pred_clt ret_cust['exp_profit'] = ggf.conditional_expected_average_profit( ret_cust['frequency'], ret_cust['monetary_value']) ret_cust = ret_cust.sort_values('predicted_cltv', ascending=False).round(3) return ret_cust
plt.show() # ################################## # Establishing the GAMMA-GAMMA MODEL ==> Expected Average Profit! # ################################## ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm_cltv["Frequency"], rfm_cltv["monetary_avg"]) # <lifetimes.GammaGammaFitter: fitted with 4338 subjects, p: 3.54, q: 1.00, v: 3.25> # ============================================= # ⭐ 10 most expected average profitable customers # ============================================= ggf.conditional_expected_average_profit(rfm_cltv["Frequency"], rfm_cltv["monetary_avg"]).sort_values(ascending=False).head(10) # Customer ID # 16000 1188.52359 # 16532 1123.69478 # 15749 970.89486 # 15098 853.82762 # 15195 824.67825 # 18102 634.92178 # 13270 593.64789 # 18080 568.96565 # 17291 554.42152 # 16698 530.14009 # dtype: float64
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) # rfm metriklerini + tenure oluşturma rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max()-date.min()).days, lambda date: (today_date-date.min()).days], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) rfm.columns.droplevel(0) rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary'] # basitleştirilmiş monetary_avg rfm["monetary"] = rfm["monetary"] / rfm["frequency"] rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True) # bgnbd için haftalık recency,tenure hesaplanması rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # kontrol rfm = rfm[rfm["monetary_avg"] > 0] rfm = rfm[(rfm['frequency'] > 1)] rfm["frequency"] = rfm["frequency"].astype(int) # bgnbd bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # expected_avg_profit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'], rfm['monetary_avg']) # 6 aylık cltv_p cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) ## recency_cltv_p, recency_weekly_cltv_p rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment"]] return rfm
plt.show() ### Gamma-Gamma model### returning_customers_summary = data[data['frequency'] > 0] returning_customers_summary[[ 'monetary_value', 'frequency' ]].corr() # Correlation between monetary value and the purchase frequency. ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) print(ggf) # estimate the average transaction value print( ggf.conditional_expected_average_profit(data['frequency'], data['monetary_value']).head(10)) # refit the BG model to the summary_with_money_value dataset bgf.fit(data['frequency'], data['recency'], data['T']) CLV_12M = ggf.customer_lifetime_value( bgf, # the model to use to predict the number of future transactions data['frequency'], data['recency'], data['T'], data['monetary_value'], time=12, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ) CLV_12M = pd.DataFrame({
frequency = df.drop_duplicates(subset=["customer", "step"], keep="first").groupby(["customer"]) \ .count() - 1 recency.rename(columns={"step": "recency"}, inplace=True) frequency.rename(columns={"step": "frequency"}, inplace=True) T.rename(columns={"step": "T"}, inplace=True) monetary.rename(columns={"amount": "monetary_value"}, inplace=True) df_rfm = pd.concat([recency, T, monetary, frequency], axis=1) ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(frequency=df_rfm["frequency"], monetary_value=df_rfm["monetary_value"]) df_rfm["expected_monetary_value"] = df_rfm.apply( lambda row: ggf.conditional_expected_average_profit( row["frequency"], row["monetary_value"]), axis=1) bgf = BetaGeoFitter(penalizer_coef=1) bgf.fit(frequency=df_rfm["frequency"], recency=df_rfm["recency"], T=df_rfm["T"]) df_rfm[ "pred_nb_purchases"] = bgf.conditional_expected_number_of_purchases_up_to_time( t=180, frequency=df_rfm["frequency"], recency=df_rfm["recency"], T=df_rfm["T"]) df_rfm["pred_revenue"] = df_rfm.apply(
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) # recency kullanıcıya özel dinamik. rfm = dataframe.groupby('Customer ID').agg({ 'InvoiceDate': [ lambda date: (date.max() - date.min()).days, lambda date: (today_date - date.min()).days ], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum() }) rfm.columns = rfm.columns.droplevel(0) # recency_cltv_p rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary'] # basitleştirilmiş monetary_avg rfm["monetary"] = rfm["monetary"] / rfm["frequency"] rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True) # BGNBD için WEEKLY RECENCY VE WEEKLY T'nin HESAPLANMASI # recency_weekly_cltv_p rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # KONTROL rfm = rfm[rfm["monetary_avg"] > 0] # recency filtre (daha saglıklı cltvp hesabı için) rfm = rfm[(rfm['frequency'] > 1)] rfm["frequency"] = rfm["frequency"].astype(int) # BGNBD bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # expected_average_profit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit( rfm['frequency'], rfm['monetary_avg']) # 6 aylık cltv_p cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # rfm.fillna(0, inplace=True) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) # recency_cltv_p, recency_weekly_cltv_p rfm = rfm[[ "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment" ]] return rfm
#The Gamma-Gamma model assumes that there is no relationship between the monetary value and the purchase frequency customer_detail[['avg_order_value', 'frequency']].corr() # In[15]: #It is used to estimate the average monetary value of customer transactions from lifetimes import GammaGammaFitter gg = GammaGammaFitter(penalizer_coef=0.001) gg.fit(customer_detail['frequency'], customer_detail['avg_order_value'], verbose=True) print( gg.conditional_expected_average_profit( customer_detail['frequency'], customer_detail['avg_order_value']).head(10)) # In[16]: customer_detail['clv'] = gg.customer_lifetime_value( mbgnbd, customer_detail['frequency'], customer_detail['recency'], customer_detail['T'], customer_detail['avg_order_value'], time=t, discount_rate=0).astype(int) customer_detail[[ 'frequency', 'pred_90d_bgf', 'monetary', 'avg_order_value', 'clv' ]].head()
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) # recency user-specific rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days, # "recency_cltv_p" lambda date: (today_date - date.min()).days], # "T" 'Invoice': lambda num: num.nunique(), # "frequency" 'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) # "monetary" rfm.columns = rfm.columns.droplevel(0) # recency_cltv_p rfm.columns = ["recency_cltv_p", "T", "frequency", "monetary"] # Simplified monetary_avg (since Gamma-Gamma model requires this way) rfm["monetary"] = rfm["monetary"] / rfm["frequency"] rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True) # Calculating WEEKLY RECENCY VE WEEKLY T for BG/NBD MODEL # recency_weekly_cltv_p rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # CHECK IT OUT! Monetary avg must be positive rfm = rfm[rfm["monetary_avg"] > 0] # recency filter rfm = rfm[(rfm["frequency"] > 1)] rfm["frequency"] = rfm["frequency"].astype(int) # converting it to integer just in case! # Establishing the BGNBD Model bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm["frequency"], rfm["recency_weekly_cltv_p"], rfm["T_weekly"]) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm["frequency"], rfm["recency_weekly_cltv_p"], rfm["T_weekly"]) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm["frequency"], rfm["recency_weekly_cltv_p"], rfm["T_weekly"]) # Establishing Gamma-Gamma Model calculates=> Expected Average Profit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm["frequency"], rfm["monetary_avg"]) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm["frequency"], rfm["monetary_avg"]) # CLTV Pred for 6 months cltv = ggf.customer_lifetime_value(bgf, rfm["frequency"], rfm["recency_weekly_cltv_p"], rfm["T_weekly"], rfm["monetary_avg"], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # Minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # rfm.fillna(0, inplace=True) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) # recency_cltv_p, recency_weekly_cltv_p rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment"]] return rfm
def predicted_purchase_time(account, timesteap): # df = pd.read_csv('AIexcel/' + account + '.csv' , sep=',', names=['name','uuid','invoiceDate','produce_name','Total'],encoding='utf8',low_memory=False) df = pd.read_csv( 'AIexcel/' + account + '.csv', names=['name', 'uuid', 'invoiceDate', 'produce_name', 'Total'], sep=',', encoding='utf8', low_memory=False) #df.rename(columns={u'收件人姓名':u'name', u'收件人手機':u'uuid', u'付款日期':u'invoiceDate', u'商品名稱':u'produce_name', u'商品總價':u'Total'}, inplace=True) df_ga = pd.read_csv('AIexcel/' + account + '_ga.csv', names=['uuid', 'level', 'next_time'], sep=',', encoding='utf8', low_memory=False) df_UserLabel = df_ga['level'][1:].tolist() df_ga.drop([0], inplace=True) if 'level' in df_ga: df_ga['level'] = df_ga.apply(ga_toLevel, axis=1) df = df.ix[df.invoiceDate.str.len() == 19] df = df.ix[df.name.str.len() <= 10] # take three columns df1 = df[['uuid', 'invoiceDate', 'Total']] # drop price == 1 df1_ = df1.drop(df1[df1['invoiceDate'] == 1].index) # drop non-data df_drop = df1_.dropna() # change columns name dataframe = df_drop dataframe['invoiceDate'] = pd.to_datetime(dataframe['invoiceDate']).dt.date dataframe.Total = dataframe.Total.astype(float) data = summary_data_from_transaction_data( dataframe, 'uuid', 'invoiceDate', observation_period_end=dataframe.invoiceDate.max()) data2 = summary_data_from_transaction_data( dataframe, 'uuid', 'invoiceDate', monetary_value_col='Total', observation_period_end=dataframe.invoiceDate.max()) bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(data['frequency'], data['recency'], data['T']) purchase_time = data purchase_time[ 'predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time( 30, data['frequency'], data['recency'], data['T']) predicted_purchases_df = purchase_time[[ 'predicted_purchases' ]].sort_values(by='predicted_purchases', ascending=False) predicted_purchases_df['cycle'] = data['recency'] / data['frequency'] returning_customers_summary = data2[(data2['frequency'] > 0) & (data2['monetary_value'] != 0)] ggf = GammaGammaFitter(penalizer_coef=0.001) ggf.fit(returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) income = ggf.conditional_expected_average_profit( returning_customers_summary['frequency'], returning_customers_summary['monetary_value']).to_frame() income.columns = ['predicted_price'] predicted_purchases_df = predicted_purchases_df.merge(income, on=['uuid'], how='left') predicted_purchases_df.reset_index(inplace=True) mask = predicted_purchases_df.predicted_purchases > 1 predicted_purchases_df.loc[mask, 'predicted_purchases'] = 1 predicted_purchases_df['predicted_purchases'] = predicted_purchases_df[ 'predicted_purchases'].astype(float) predicted_purchases_df = predicted_purchases_df.sort_values( by=['predicted_purchases'], ascending=False) predicted_purchases_df['predicted_purchases'] = predicted_purchases_df[ 'predicted_purchases'].apply(lambda x: format(x, '.2%')) predicted_purchases_df = predicted_purchases_df.merge(df_ga, left_on="uuid", right_on="uuid", how='left') predicted_purchases_df['level'] = predicted_purchases_df.apply(flag_df, axis=1) #predicted_purchases_df['level'] = predicted_purchases_df['level'].fillna(1) predicted_purchases_df.replace(np.nan, 0, inplace=True) predicted_purchases_df.replace(np.inf, 0, inplace=True) if 'next_time' not in predicted_purchases_df.columns: predicted_purchases_df['next_time'] = np.nan predicted_purchases_df['next_time'] = pd.to_datetime( predicted_purchases_df['next_time']) predicted_purchases_df_N = predicted_purchases_df[~( predicted_purchases_df.uuid.isin( ((predicted_purchases_df[predicted_purchases_df.next_time >= today] .uuid).astype(str)).tolist()))] predicted_purchases_df_off = predicted_purchases_df[( predicted_purchases_df.uuid.isin( ((predicted_purchases_df[predicted_purchases_df.next_time >= today] .uuid).astype(str)).tolist()))] new_df = predicted_purchases_df_N.append(predicted_purchases_df_off, ignore_index=True) predicted_purchases_df_N['cycle'] = ( predicted_purchases_df_N['cycle'] * predicted_purchases_df_N['level']).round(0).astype(int) predicted_purchases_df_N[ 'next_time'] = today + predicted_purchases_df_N.apply(time_df, axis=1) predicted_purchases_df_NQ = predicted_purchases_df_N.dropna() predicted_purchases_df_off = predicted_purchases_df_off.drop( columns=['predicted_purchases', 'cycle', 'predicted_price']) predicted_purchases_df_NQ = predicted_purchases_df_NQ.drop( columns=['predicted_purchases', 'cycle', 'predicted_price']) df_ga = df_ga.merge(predicted_purchases_df_off, left_on="uuid", right_on="uuid", how='left') df_ga = df_ga.merge(predicted_purchases_df_NQ, left_on="uuid", right_on="uuid", how='left') notNull_df = df_ga[ df_ga['level'].notnull() & df_ga['next_time'].notnull()].drop( columns=['level_y', 'next_time_y', 'next_time_x', 'level_x']) notNull_df2 = df_ga[ df_ga['level_y'].notnull() & df_ga['next_time_y'].notnull()].drop( columns=['level', 'next_time', 'next_time_x', 'level_x']) notNull_df2.columns = ['uuid', 'level', 'next_time'] res = pd.concat([notNull_df, notNull_df2], axis=0, ignore_index=True) res.rename(columns={u'uuid': u'收件人手機'}, inplace=True) res['UserLabel'] = pd.Series(df_UserLabel) res = res[[u'收件人手機', u'UserLabel', u'next_time']] # res.to_csv('AIexcel/' + account + '_ga.csv',index=False,encoding='utf8') predicted_purchases_df_N = predicted_purchases_df_N.drop( columns=['level', 'cycle', 'next_time']) predicted_purchases_df_N.columns = [u'收件人手機', u'顧客購買機率', u'平均交易金額'] return predicted_purchases_df_N # print(predicted_purchase_time(account,30)[:30])
plt.savefig('recencymatrix.png') plt.close() plot_probability_alive_matrix(bgf) #pylab.show() plt.savefig('probability.png') individual = summary.iloc[20] #print(individual) t = 7 print("\n\n\nselected customer probability in next week") print( bgf.conditional_expected_number_of_purchases_up_to_time( t, individual['frequency'], individual['recency'], individual['T'])) summary['predicted_purchases'] = ( bgf.conditional_expected_number_of_purchases_up_to_time( t, summary['frequency'], summary['recency'], summary['T'])) print(summary.head()) summary2 = summary[summary['frequency'] > 0] ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(summary2['frequency'], summary2['monetary_value']) print(ggf) print("\n\n\nSelected customer clv") print( ggf.conditional_expected_average_profit(individual['frequency'], individual['monetary_value'])) summary['clv'] = (ggf.conditional_expected_average_profit( summary2['frequency'], summary2['monetary_value'])) print(summary.head())
################################################################ # Tahmin Sonuçlarının Değerlendirilmesi ################################################################ plot_period_transactions(bgf) plt.show() ############################################################## # 3. GAMMA-GAMMA Modelinin Kurulması ############################################################## ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) ggf.conditional_expected_average_profit(rfm['frequency'], rfm['monetary_avg']).head(10) ggf.conditional_expected_average_profit( rfm['frequency'], rfm['monetary_avg']).sort_values(ascending=False).head(10) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit( rfm['frequency'], rfm['monetary_avg']) rfm.sort_values("expected_average_profit", ascending=False).head(20) ############################################################## # 4. BG-NBD ve GG modeli ile CLTV'nin hesaplanması. ############################################################## cltv = ggf.customer_lifetime_value(
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) ## recency for users dinamic. rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days, lambda date: (today_date - date.min()).days], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) rfm.columns = rfm.columns.droplevel(0) ## recency_cltv_p rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary'] ## simplified monetary_avg rfm["monetary"] = rfm["monetary"] / rfm["frequency"] rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True) # BGNBD CALCULATE WEEKLY RECENCY AND WEEKLY T for ## recency_weekly_cltv_p rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # CONTROL rfm = rfm[rfm["monetary_avg"] > 0] ## recency filtre (cltv_p for much better calculation) rfm = rfm[(rfm['frequency'] > 1)] rfm["frequency"] = rfm["frequency"].astype(int) # BGNBD bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) # expected_average_profit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'], rfm['monetary_avg']) # 6 months cltv_p cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) ## recency_cltv_p, recency_weekly_cltv_p rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment"]] return rfm
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) rfm = dataframe.groupby('Customer ID').agg({ 'InvoiceDate': [ lambda date: (date.max() - date.min()).days, lambda date: (today_date - date.min()).days ], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda price: price.sum() }) rfm.columns = rfm.columns.droplevel(0) rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary'] rfm['monetary'] = rfm['monetary'] / rfm['frequency'] rfm.rename(columns={'monetary': 'monetary_avg'}, inplace=True) rfm["recency_weekly_cltv_p"] = rfm['recency_cltv_p'] / 7 rfm['T_weekly'] = rfm['T'] / 7 rfm = rfm[rfm['monetary_avg'] > 0] rfm = rfm[(rfm['frequency'] > 1)] rfm['frequency'] = rfm['frequency'].astype(int) #BGNBD bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly']) #Gamma Gamma ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit( rfm['frequency'], rfm['monetary_avg']) cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq='W', discount_rate=0.01) rfm["cltv_p"] = cltv scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) rfm = rfm[[ "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment" ]] return rfm
from lifetimes import BetaGeoFitter bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(data['frequency'], data['recency'], data['T']) future_horizon = 10000 data['predicted_purchases'] = bgf.predict(future_horizon, data['frequency'], data['recency'], data['T']) data.head() from lifetimes import GammaGammaFitter returning_customers_summary = data[data['frequency'] > 0] ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) transaction_spend = ggf.conditional_expected_average_profit( data['frequency'], data['monetary_value']).mean() print(transaction_spend) customers_pm = customers_ac.join(data['predicted_purchases'], on='id', how='left').drop(columns='clv') customers_pm['clv'] = customers_pm \ .apply( lambda x: x['predicted_purchases'] * transaction_spend, axis = 1) customers_pm.tail()
#plot_frequency_recency_matrix(bgf) #pylab.show() #plot_probability_alive_matrix(bgf) #pylab.show() index = 0 val = (sys.argv[1]) for row in summary: if row[0] == val: break else: index += 1 individual = summary.iloc[index] #print(individual) t = 7 #print("\n\n\nselected customer probability in next week") #print(bgf.conditional_expected_number_of_purchases_up_to_time(t,individual['frequency'],individual['recency'],individual['T'])) #summary['predicted_purchases']=(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T'])) #print (summary.head()) summary2 = summary[summary['frequency'] > 0] ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(summary2['frequency'], summary2['monetary_value']) #print (ggf) #print("\n\n\nSelected customer clv") print( ggf.conditional_expected_average_profit(individual['frequency'], individual['monetary_value'])) #summary['clv']=(ggf.conditional_expected_average_profit(summary2['frequency'],summary2['monetary_value'])) #print(summary.head())
def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (today_date - date.max()).days, lambda date: (today_date - date.min()).days], 'Invoice': lambda num: num.nunique(), 'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) rfm.columns = rfm.columns.droplevel(0) rfm.columns = ['recency', 'T', 'frequency', 'monetary'] # CALCULATION OF MONETARY AVG & ADDING RFM INTO DF temp_df = dataframe.groupby(["Customer ID", "Invoice"]).agg({"TotalPrice": ["mean"]}) temp_df = temp_df.reset_index() temp_df.columns = temp_df.columns.droplevel(0) temp_df.columns = ["Customer ID", "Invoice", "total_price_mean"] temp_df2 = temp_df.groupby(["Customer ID"], as_index=False).agg({"total_price_mean": ["mean"]}) temp_df2.columns = temp_df2.columns.droplevel(0) temp_df2.columns = ["Customer ID", "monetary_avg"] rfm = rfm.merge(temp_df2, how="left", on="Customer ID") rfm.set_index("Customer ID", inplace=True) rfm.index = rfm.index.astype(int) # CALCULATION OF WEEKLY RECENCY AND WEEKLY T FOR BGNBD rfm["recency_weekly"] = rfm["recency"] / 7 rfm["T_weekly"] = rfm["T"] / 7 # CONTROL rfm = rfm[rfm["monetary_avg"] > 0] rfm["frequency"] = rfm["frequency"].astype(int) # BGNBD bgf = BetaGeoFitter(penalizer_coef=0.001) bgf.fit(rfm['frequency'], rfm['recency_weekly'], rfm['T_weekly']) # exp_sales_1_month rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'], rfm['recency_weekly'], rfm['T_weekly']) # exp_sales_3_month rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'], rfm['recency_weekly'], rfm['T_weekly']) # expected_average_profit ggf = GammaGammaFitter(penalizer_coef=0.001) ggf.fit(rfm['frequency'], rfm['monetary_avg']) rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'], rfm['monetary_avg']) # 6 MONTHS cltv_p cltv = ggf.customer_lifetime_value(bgf, rfm['frequency'], rfm['recency_weekly'], rfm['T_weekly'], rfm['monetary_avg'], time=6, freq="W", discount_rate=0.01) rfm["cltv_p"] = cltv # minmaxscaler scaler = MinMaxScaler(feature_range=(1, 100)) scaler.fit(rfm[["cltv_p"]]) rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]]) # cltv_p_segment rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"]) rfm = rfm[["monetary_avg", "T", "recency_weekly", "T_weekly", "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit", "cltv_p", "cltv_p_segment"]] return rfm
returning_customers_summary = modeldata[modeldata['frequency']>0] print(len(returning_customers_summary)) returning_customers_summary.shape from lifetimes import GammaGammaFitter ggf = GammaGammaFitter(penalizer_coef = 0) ggf.fit(returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) print(ggf) returning_customers_summary = returning_customers_summary[returning_customers_summary['monetary_value']>0] returning_customers_summary['predicted_avg_sales']=ggf.conditional_expected_average_profit(returning_customers_summary['frequency'],returning_customers_summary['monetary_value']) # checking the expevred average value and the actual average value in the data to make sure the values are good print(f"Expected Average sales: {returning_customers_summary['predicted_avg_sales'].mean()}") print(f"Actual Average sales: {returning_customers_summary['monetary_value'].mean()}") # The values seem to be fine #calculating CLV for 1 month returning_customers_summary['Predicted_CLV'] = ggf.customer_lifetime_value(bgf, returning_customers_summary['frequency'], returning_customers_summary['recency'], returning_customers_summary['T'], returning_customers_summary['monetary_value'], time=1, # lifetime in months
# Expected sales for whole company for 1 week bgf.conditional_expected_number_of_purchases_up_to_time (4, cltv["frequency"], cltv["recency_weekly"], cltv["T_weekly"]).sort_values (ascending=False).sum () plot_period_transactions (bgf) plt.show () ###### # GAMMA-GAMMA ##### ggf = GammaGammaFitter (penalizer_coef=0.01) ggf.fit (cltv["frequency"], cltv["monetary_avg"]) cltv["expected_average_profit"] = ggf.conditional_expected_average_profit (cltv["frequency"], cltv["monetary_avg"]) cltv.sort_values (by="expected_average_profit", ascending=False).head () ########### # 4. CLTV calculation with BG-NBD and GG models ########### cltv["cltv_six_months"] = ggf.customer_lifetime_value (bgf, cltv["frequency"], cltv["recency_weekly"], cltv["T_weekly"], cltv["monetary_avg"], time=6, discount_rate=0.01, freq="W")
class CLV(object): """ INPUT pmg_num (int) the product market group number, default = 1 outfile1 (str) the filename indicating where to store the raw data before analysis, default = '../data/clvtrainingset01.csv' outfile2 (str) the filename containing the results, default = '../data/clv01.csv' date_range (list) the start date and end date of the years to analyze, default = ['2008-09-01','2016-09-01'] attributes other than those listed above self.data (DataFrame) a pandas DataFrame object of the data to be used for analysis self.bgf (from lifetimes) a statistical model object from the lifetimes package self.ggf (from lifetimes) a statistical model object from the lifetimes package self.results (DataFrame) a pandas DataFrame object of the results of analysis """ def __init__(self,pmg_num=1,outfile1='../data/clvtrainingset01.csv',outfile2='../data/clv01.csv',date_range=['2008-09-01','2016-09-01']): self.pmg_num = pmg_num # outfile1 stores a clean version of the raw data used for analysis; this is important for reproducibility self.outfile1 = outfile1 # outfile2 stores the clv estimation results self.outfile2 = outfile2 self.date_range = date_range self.data = None self.bgf = None self.ggf = None self.results = None def get_data_from_server(self,cmd=None): """ Gets data from sales_db and stores the query results in self.data INPUT cmd (str) the default sql query is below The default query has been replaced. The original query was an 8 line select command. """ # server name dsn = "THE SERVER NAME" cnxn_name = "DSN=%s" % dsn connection = odbc.connect(cnxn_name) # use to access the database c = connection.cursor() # generate cursor object # Grab transaction data from Postgres if not cmd: cmd = """SQL DEFAULT COMMAND GOES HERE""" % (self.pmg_num,self.date_range[0],self.date_range[1]) c.execute(cmd) # execute the sql command # list to store the query data transaction_data = [] # create a dictionary to convert customer ids to name to_name = dict(np.genfromtxt('../data/names.csv',dtype=str,delimiter='\t')) for row in c: cust, rsv_date, sales = row # pull data from each row of the query data cust_id = str(int(cust)) name = to_name[cust_id] # check to see if customer is inactive if use(name): rsv_date1_readable = rsv_date.strftime('%Y-%m-%d') # date formatting sales_float = float(sales) # convert to float; represents the transaction amount transaction_data.append({"id":cust, "date":rsv_date, "sales":sales_float}) # add dictionary of data to list # convert to dataframe df = pd.DataFrame(transaction_data, columns=['id', 'date', 'sales']) # store results df.to_csv(self.outfile1,index=False) # IMPORTANT: use correct observation_period_end date self.data = summary_data_from_transaction_data(df, 'id', 'date', 'sales', observation_period_end=self.date_range[1], freq='M') def get_data_from_file(self,filename,**kwargs): df = pd.read_csv(filename,**kwargs) self.data = summary_data_from_transaction_data(df, 'id', 'date', 'sales', observation_period_end=self.date_range[1], freq='M') def fit(self,months=96): """ Computes CLV estimates for the next n months and stores results in self.results INPUT months (int) number of months to predict, default = 96 (8 years) """ ### PREDICT NUMBER OF PURCHASES self.bgf = BetaGeoFitter() # see lifetimes module documentation for details self.bgf.fit(self.data['frequency'], self.data['recency'], self.data['T']) # 8 years = 96 months self.data['predicted_purchases'] = self.bgf.conditional_expected_number_of_purchases_up_to_time( months, self.data['frequency'], self.data['recency'], self.data['T']) ### PREDICT FUTURE PURCHASE AMOUNT self.ggf = GammaGammaFitter(penalizer_coef = 0) self.ggf.fit(self.data['frequency'], self.data['monetary_value']) # predict next transaction self.data['predicted_trans_profit'] = self.ggf.conditional_expected_average_profit( frequency = self.data['frequency'], monetary_value = self.data['monetary_value']) ### ESTIMATE CLV self.data['clv_estimation'] = self.data['predicted_trans_profit'] * self.data['predicted_purchases'] self.data['prob_alive'] = self.bgf.conditional_probability_alive( self.data['frequency'], self.data['recency'], self.data['T']) self.results = self.data.sort_values(by='clv_estimation',ascending=False) # store results self.results.to_csv(self.outfile2,index=False) def plot_matrices(self): """ plots three matrices: probability alive matrix: displays the probability that a customer is active frequency recency matrix: displays frequency and recency with color corresponding to monetary value period transactions: displays predicted and actual transaction values over time (check documentation in lifetimes for more details) """ plot_probability_alive_matrix(self.bgf,cmap='viridis') plot_frequency_recency_matrix(self.bgf,cmap='viridis') plot_period_transactions(self.bgf)
############################################################## # GAMMA GAMMA MODEL ############################################################## # Gamma Gamma'yı kullanabileceğimizden emin olmak için, frekans ve parasal değerlerin # ilişkili olup olmadığını kontrol etmemiz gerekir. (?) combined_data[['monetary_value_cal', 'frequency_cal']].corr() # Korelasyon düşük, devam #Model fit ggf = GammaGammaFitter(penalizer_coef=0.01) ggf.fit(combined_data['frequency_cal'], combined_data['monetary_value_cal']) #Prediction monetary_pred = ggf.conditional_expected_average_profit( combined_data['frequency_cal'], combined_data['monetary_value_cal']) # Actual values ile predicted values gözlemlemek için yeni bir dataframe oluşturma df_comp_m = pd.DataFrame() df_comp_m["ActualMonetary"] = combined_data['monetary_value_holdout'] df_comp_m["Predicted"] = monetary_pred df_comp_m.head(20) print("Expected Average Sales: %s" % monetary_pred.mean()) print("Actual Average Sales: %s" % combined_data["monetary_value_holdout"].mean()) print("Difference: %s" % (combined_data["monetary_value_holdout"].mean() - monetary_pred.mean())) print( "Mean Squared Error: %s" % mean_squared_error(combined_data["monetary_value_holdout"], monetary_pred))
t = 1 df["predicted_purchases"] = bgf.conditional_expected_number_of_purchases_up_to_time( t, df["FREQUENCY"], df["RECENCY"], df["T"]) df.sort_values(by="predicted_purchases").tail(10) # ========================================================================== # Gamma Gamme Model # Model assumes that there is no relationship between the monetary value and the purchase frequency # ========================================================================== df[["MONETARY_VALUE", "FREQUENCY"]].corr() ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(df["FREQUENCY"], df["MONETARY_VALUE"]) ggf.conditional_expected_average_profit(df["FREQUENCY"], df["MONETARY_VALUE"]).head(10) print("Expected conditional average profit: %s, Average profit: %s" % (ggf.conditional_expected_average_profit(df["FREQUENCY"], df["MONETARY_VALUE"]).mean(), df[df["FREQUENCY"] > 0]["MONETARY_VALUE"].mean())) bgf.fit(df["FREQUENCY"], df["RECENCY"], df["T"]) pred = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions df["FREQUENCY"], df["RECENCY"], df["T"], df["MONETARY_VALUE"], time=1, # year
summary_ggf = summary.loc[(summary.frequency > 0) & (summary.monetary_value > 0)] summary_ggf.columns summary_ggf[['frequency', 'monetary_value']].corr() summary_ggf.monetary_value.hist() from lifetimes import GammaGammaFitter ggf = GammaGammaFitter(penalizer_coef=0.0) ggf.fit(summary_ggf['frequency'], summary_ggf['monetary_value']) ggf.conditional_expected_average_profit(summary_ggf['frequency'], summary_ggf['monetary_value']).head(10) bgf.fit(summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T']) bgf.fit(summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T']) ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T'], summary_ggf['monetary_value'], time=12, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ).head(10)
def generate_clv_table(data, clv_prediction_time=None, model_penalizer=None): #set default values if they are not stated if clv_prediction_time is None: clv_prediction_time = 12 if model_penalizer is None: model_penalizer = 0 # Reformat csv as a Pandas dataframe #data = pd.read_csv(csv_file) #Remove non search sessions data = data[data['Searches'] > 0] max_date = data['activity_date'].max() # Using "summary_data_from_transaction_data" function to agregate the activity stream into the appropriate metrics # Model requires 'activity_date' column name. For our purpose this is synonymous with submission_date. summary = summary_data_from_transaction_data( data, 'client_id', 'activity_date', 'Revenue', observation_period_end=max_date) # Building the Model using BG/NBD bgf = BetaGeoFitter(penalizer_coef=model_penalizer) bgf.fit(summary['frequency'], summary['recency'], summary['T']) # Conditional expected purchases # These are the expected purchases expected from each individual given the time specified # t = days in to future t = 14 summary[ 'predicted_searches'] = bgf.conditional_expected_number_of_purchases_up_to_time( t, summary['frequency'], summary['recency'], summary['T']) #Conditional Alive Probability summary['alive_prob'] = summary.apply( lambda row: calc_alive_prob(row, bgf), axis=1) summary['alive_prob'] = summary['alive_prob'].astype(float) #print summary['alive_prob'] # There cannot be non-positive values in the monetary_value or frequency vector summary_with_value_and_returns = summary[(summary['monetary_value'] > 0) & (summary['frequency'] > 0)] # There cannot be zero length vectors in one of frequency, recency or T #summary_with_value_and_returns = #print summary_with_value_and_returns[ # (len(summary_with_value_and_returns['recency'])>0) & # (len(summary_with_value_and_returns['frequency'])>0) & # (len(summary_with_value_and_returns['T'])>0) #] if any( len(x) == 0 for x in [ summary_with_value_and_returns['recency'], summary_with_value_and_returns['frequency'], summary_with_value_and_returns['T'] ]): logger.debug(data['client_id']) # Setting up Gamma Gamma model ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(summary_with_value_and_returns['frequency'], summary_with_value_and_returns['monetary_value']) # Output average profit per tranaction by client ID ggf_output = ggf.conditional_expected_average_profit( summary_with_value_and_returns['frequency'], summary_with_value_and_returns['monetary_value']) # Refitting the BG/NBD model with the same data if frequency, recency or T are not zero length vectors if not (len(x) == 0 for x in [ summary_with_value_and_returns['recency'], summary_with_value_and_returns['frequency'], summary_with_value_and_returns['T'] ]): bgf.fit(summary_with_value_and_returns['frequency'], summary_with_value_and_returns['recency'], summary_with_value_and_returns['T']) # Getting Customer lifetime value using the Gamma Gamma output # NOTE: the time can be adjusted, but is currently set to 12 months customer_predicted_value = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions summary_with_value_and_returns['frequency'], summary_with_value_and_returns['recency'], summary_with_value_and_returns['T'], summary_with_value_and_returns['monetary_value'], time=clv_prediction_time, # months discount_rate=0.01 # monthly discount rate ~ 12.7% annually ) # Converting to dataframe df_cpv = pd.DataFrame({ 'client_id': customer_predicted_value.index, 'pred_values': customer_predicted_value.values }) # Setting client_id as index df_cpv = df_cpv.set_index('client_id') # Merge with original summary df_merged = pd.merge(summary, df_cpv, left_index=True, right_index=True, how='outer') # Historical CLV data_hist = data.groupby( ['client_id'])['Searches', 'Revenue'].apply(lambda x: x.astype(float).sum()) # Merge with original summary df_final = pd.merge(df_merged, data_hist, left_index=True, right_index=True, how='outer') # Prevent NaN on the pred_clv column df_final.pred_values[df_final.frequency == 0] = 0.0 # Create column that combines historical and predicted customer value df_final['total_clv'] = df_final['pred_values'] + df_final['Revenue'] # Create column which calculates in days the number of days since they were last active df_final['last_active'] = df_final['T'] - df_final['recency'] # Create a column which labels users inactive over 14 days as "Expired" ELSE "Active" df_final['user_status'] = np.where(df_final['last_active'] > 14, 'Expired', 'Active') # Add column with date of calculation # Set calc_date to max submission date df_final['calc_date'] = max_date.date() #pd.Timestamp('today').date() # Rename columns as appropriate df_final.columns = [ 'frequency', 'recency', 'customer_age', 'avg_session_value', 'predicted_searches_14_days', 'alive_probability', 'predicted_clv_12_months', 'historical_searches', 'historical_clv', 'total_clv', 'days_since_last_active', 'user_status', 'calc_date' ] #Prevent non returning customers from having 100% alive probability df_final.alive_probability[df_final.frequency == 0] = 0.0 return df_final
summary_with_money_value.head() ## Filtering out customers who have only 1 purchase returning_customers_summary = summary_with_money_value[ summary_with_money_value['frequency'] > 0] ############################### Average Profit Calulation ########## #At this point we can train our Gamma-Gamma submodel and predict the conditional, expected average lifetime value of our customers. from lifetimes import GammaGammaFitter ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) print(ggf) #We can now estimate the average transaction value: AVG_Profit = ggf.conditional_expected_average_profit( returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) AVG_Profit = pd.Series(AVG_Profit) ############################### Customer Life Time Value Calculationn ########## # refit the BG model to the summary_with_money_value dataset, #the model to use to predict the number of future transactions from lifetimes import BetaGeoFitter bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(returning_customers_summary['frequency'], returning_customers_summary['recency'], returning_customers_summary['T']) CLV_1Year = ggf.customer_lifetime_value( bgf, returning_customers_summary['frequency'], returning_customers_summary['recency'],