Python GammaGammaFitter.conditional_expected_average_profitの例、lifetimes.GammaGammaFitter.conditional_expected_average_profit Pythonの例

コード例 #1

0

ファイルを表示

ファイル: CustomerLifetimeValueAnalysis.py プロジェクト: USPA-Technology/A_nalysis

class transactionMonetary(object):

    def summary_trans_create(self, df):
        '''
        Subset df on sales data, return trans summary with monetary spend
        '''
        sales = subset_data(df, 'OrderType', 1)
        sales = sales[sales.OrderTotal>0]
        transaction_data_monetary = sales[['OrderDate', 'CustomerNo', 'OrderTotal']]
        self.summary_monetary = summary_data_from_transaction_data(transaction_data_monetary, 'CustomerNo', 'OrderDate', 'OrderTotal', observation_period_end='2017-02-08')
        #keep customers with more than one spend
        self.return_customers = self.summary_monetary[self.summary_monetary['frequency']>0]
        return self.return_customers

    def fit_ggf(self):
        self.ggf = GammaGammaFitter(penalizer_coef = 0)
        self.ggf.fit(self.return_customers['frequency'], self.return_customers['monetary_value'])

    def summaryOutput(self, discount_rate=0.12, months=12):
        '''
        Fit beta geometric model to calculate CLV, and use GG model to calculate expected profit
        Per customer
        Write out CLV and profits to csv, print out averages to screen
        '''
        beta_model = BetaGeoFitter()
        #calulate average transaction value
        self.summary_monetary['avg_transaction_value'] = self.ggf.conditional_expected_average_profit(
        self.summary_monetary['frequency'],
        self.summary_monetary['monetary_value'])
        #fit beta geo model
        beta_model.fit(self.summary_monetary['frequency'], self.summary_monetary['recency'], self.summary_monetary['T'])
        #calculate clv, with discount rate calulated over year (default)
        disc_rate = discount_rate/months/30
        self.summary_monetary['clv'] = self.ggf.customer_lifetime_value(
        beta_model, #the model to use to predict the number of future transactions
        self.summary_monetary['frequency'],
        self.summary_monetary['recency'],
        self.summary_monetary['T'],
        self.summary_monetary['monetary_value'], time=months, # months
        discount_rate=disc_rate # monthly discount rate ~ 12.7% annually
        )
        #print customer data with calculations
        self.summary_monetary.to_csv("CLV_AVG_transactionValue_perCustomer.csv", index=False)
        #print summary stats
        print("Expected conditional average profit: {}, Average profit: {}".format(
        self.ggf.conditional_expected_average_profit(
            self.summary_monetary['frequency'],
            self.summary_monetary['monetary_value']).mean(),
        self.summary_monetary[self.summary_monetary['frequency']>0]['monetary_value'].mean()))

コード例 #2

0

ファイルを表示

ファイル: CRM_Analysis_with_Argparse.py プロジェクト: anlgev/CRM_Analysis_with_Argparse

def create_cltv_pred(dataframe, w=4, m=1):
    """
    Gamagama and BGNBD model and prediction
    Parameters
    ----------
    dataframe
    w: int, week information for BGNBD model
    m: int, month information for gamama model

    Returns
    Dataframe
    -------

    """
    # BGNBD

    dataframe = dataframe[dataframe["monetary_avg"] > 0]
    dataframe["frequency"] = dataframe["frequency"].astype(int)

    bgf = BetaGeoFitter(penalizer_coef=0.001)
    bgf.fit(dataframe['frequency'], dataframe['recency_weekly'],
            dataframe['T_weekly'])

    dataframe[f'exp_sales_{w}_week'] = bgf.predict(w, dataframe['frequency'],
                                                   dataframe['recency_weekly'],
                                                   dataframe['T_weekly'])

    # Gamagama - expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.001)
    ggf.fit(dataframe['frequency'], dataframe['monetary_avg'])
    dataframe[
        "expected_average_profit"] = ggf.conditional_expected_average_profit(
            dataframe['frequency'], dataframe['monetary_avg'])

    # CLTV Prediction
    cltv = ggf.customer_lifetime_value(bgf,
                                       dataframe['frequency'],
                                       dataframe['recency_weekly'],
                                       dataframe['T_weekly'],
                                       dataframe['monetary_avg'],
                                       time=m,
                                       freq="W",
                                       discount_rate=0.01)

    dataframe[f'cltv_p_{m}_month'] = cltv

    scaler = MinMaxScaler(feature_range=(1, 100))
    dataframe['cltv_p_score'] = scaler.fit_transform(
        dataframe[[f'cltv_p_{m}_month']])

    # cltv_p Segment
    dataframe['cltv_p_segment'] = pd.qcut(dataframe['cltv_p_score'],
                                          3,
                                          labels=['C', 'B', 'A'])

    new_col = dataframe.columns[~dataframe.columns.
                                isin(['recency', 'frequency', 'monetary'])]
    dataframe = dataframe[new_col]

    return dataframe

コード例 #3

0

ファイルを表示

ファイル: app.py プロジェクト: ZhengTzer/angela-ml-2018

def predictSpending(customerId):
    # initialize the data dictionary that will be returned
    data = {"success": False, "result": {"customerId": "", "y": 0.0}}

    # ensure the customer ID was properly uploaded to our endpoint
    if customerId:
        print("* get data")
        data = pandas.read_csv("sample_transactions.csv")
        #data = pandas.read_json(baseURL + "/api/transactions")
        #data = data.drop(columns="_id")

        print("* prepare data")
        # prepare and shaping the data
        # columns -
        #   customerId
        # 	frequency : number of repeat purchase transactions
        #	recency: time (in days) between first purchase and latest purchase
        #	T: time (in days) between first purchase and end of the period under study
        #	monetary_value: average transactions amount
        today = pandas.to_datetime(datetime.date.today())
        summaryData = summary_data_from_transaction_data(
            data,
            "customerId",
            "transactionDate",
            monetary_value_col="transactionAmount",
            observation_period_end=today)
        # filter the customer data that has no transaction
        analysisData = summaryData[summaryData["frequency"] > 0]

        # get the stat of the particular customer
        customer = analysisData.loc[customerId]

        # load model
        ggf_loaded = GammaGammaFitter()
        ggf_loaded.load_model('ggf.pkl')

        # estimate the average transaction amount
        predict = ggf_loaded.conditional_expected_average_profit(
            customer["frequency"], customer['monetary_value'])

        # add the input and predicted output to the return data
        data = {
            "success": True,
            "result": {
                "customerId": customerId,
                "y": predict
            }
        }

    # return the data dictionary as a JSON response
    return flask.jsonify(data)

コード例 #4

0

ファイルを表示

def gg_model(rfmmod, bgf, p, f):
    # Build the Model
    ret_cust = rfmmod[(rfmmod['frequency'] > 0)
                      & (rfmmod['monetary_value'] > 0)]
    ggf = GammaGammaFitter(penalizer_coef=p)
    ggf.fit(ret_cust['frequency'], ret_cust['monetary_value'])
    pred_clt = ggf.customer_lifetime_value(
        bgf,
        ret_cust['frequency'],
        ret_cust['recency'],
        ret_cust['T'],
        ret_cust['monetary_value'],
        time=12,  # months
        freq=f,
        discount_rate=0.01)
    ret_cust['predicted_cltv'] = pred_clt
    ret_cust['exp_profit'] = ggf.conditional_expected_average_profit(
        ret_cust['frequency'], ret_cust['monetary_value'])
    ret_cust = ret_cust.sort_values('predicted_cltv', ascending=False).round(3)
    return ret_cust

コード例 #5

0

ファイルを表示

plt.show()

# ##################################
# Establishing the GAMMA-GAMMA MODEL  ==> Expected Average Profit!
# ##################################

ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(rfm_cltv["Frequency"], rfm_cltv["monetary_avg"])

#  <lifetimes.GammaGammaFitter: fitted with 4338 subjects, p: 3.54, q: 1.00, v: 3.25>


# =============================================
# ⭐ 10 most expected average profitable customers 
# =============================================
ggf.conditional_expected_average_profit(rfm_cltv["Frequency"],
                                        rfm_cltv["monetary_avg"]).sort_values(ascending=False).head(10)

# Customer ID
# 16000   1188.52359
# 16532   1123.69478
# 15749    970.89486
# 15098    853.82762
# 15195    824.67825
# 18102    634.92178
# 13270    593.64789
# 18080    568.96565
# 17291    554.42152
# 16698    530.14009
# dtype: float64

コード例 #6

0

ファイルを表示

ファイル: ARL_ADVANCED.py プロジェクト: hasan55-krmz/CRM-Analytics

def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)
    # rfm metriklerini + tenure oluşturma
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max()-date.min()).days,
                                                                lambda date: (today_date-date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})
    rfm.columns.droplevel(0)
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    # basitleştirilmiş monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]
    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # bgnbd için haftalık recency,tenure hesaplanması
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # kontrol
    rfm = rfm[rfm["monetary_avg"] > 0]
    rfm = rfm[(rfm['frequency'] > 1)]
    rfm["frequency"] = rfm["frequency"].astype(int)

    # bgnbd
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly_cltv_p'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # expected_avg_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 aylık cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)
    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    ## recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm

コード例 #7

0

ファイルを表示

ファイル: CLV.py プロジェクト: lizzzfang/Customer-Analytics

plt.show()

### Gamma-Gamma model###
returning_customers_summary = data[data['frequency'] > 0]
returning_customers_summary[[
    'monetary_value', 'frequency'
]].corr()  # Correlation between monetary value and the purchase frequency.

ggf = GammaGammaFitter(penalizer_coef=0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
print(ggf)

# estimate the average transaction value
print(
    ggf.conditional_expected_average_profit(data['frequency'],
                                            data['monetary_value']).head(10))

# refit the BG model to the summary_with_money_value dataset
bgf.fit(data['frequency'], data['recency'], data['T'])

CLV_12M = ggf.customer_lifetime_value(
    bgf,  # the model to use to predict the number of future transactions
    data['frequency'],
    data['recency'],
    data['T'],
    data['monetary_value'],
    time=12,  # months
    discount_rate=0.01  # monthly discount rate ~ 12.7% annually
)

CLV_12M = pd.DataFrame({

コード例 #8

0

ファイルを表示

ファイル: clv.py プロジェクト: damienmarlier51/bankSim

    frequency = df.drop_duplicates(subset=["customer", "step"],
                                   keep="first").groupby(["customer"]) \
                                                .count() - 1

    recency.rename(columns={"step": "recency"}, inplace=True)
    frequency.rename(columns={"step": "frequency"}, inplace=True)
    T.rename(columns={"step": "T"}, inplace=True)
    monetary.rename(columns={"amount": "monetary_value"}, inplace=True)

    df_rfm = pd.concat([recency, T, monetary, frequency], axis=1)
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(frequency=df_rfm["frequency"],
            monetary_value=df_rfm["monetary_value"])

    df_rfm["expected_monetary_value"] = df_rfm.apply(
        lambda row: ggf.conditional_expected_average_profit(
            row["frequency"], row["monetary_value"]),
        axis=1)

    bgf = BetaGeoFitter(penalizer_coef=1)
    bgf.fit(frequency=df_rfm["frequency"],
            recency=df_rfm["recency"],
            T=df_rfm["T"])

    df_rfm[
        "pred_nb_purchases"] = bgf.conditional_expected_number_of_purchases_up_to_time(
            t=180,
            frequency=df_rfm["frequency"],
            recency=df_rfm["recency"],
            T=df_rfm["T"])

    df_rfm["pred_revenue"] = df_rfm.apply(

コード例 #9

0

ファイルを表示

def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    # recency kullanıcıya özel dinamik.
    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate': [
            lambda date: (date.max() - date.min()).days, lambda date:
            (today_date - date.min()).days
        ],
        'Invoice':
        lambda num: num.nunique(),
        'TotalPrice':
        lambda TotalPrice: TotalPrice.sum()
    })

    rfm.columns = rfm.columns.droplevel(0)

    # recency_cltv_p
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    # basitleştirilmiş monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]

    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # BGNBD için WEEKLY RECENCY VE WEEKLY T'nin HESAPLANMASI
    # recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # KONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]

    # recency filtre (daha saglıklı cltvp hesabı için)
    rfm = rfm[(rfm['frequency'] > 1)]

    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(
        rfm['frequency'], rfm['monetary_avg'])
    # 6 aylık cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # rfm.fillna(0, inplace=True)

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    # recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[[
        "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p",
        "T_weekly", "exp_sales_1_month", "exp_sales_3_month",
        "expected_average_profit", "cltv_p", "cltv_p_segment"
    ]]

    return rfm

コード例 #10

0

ファイルを表示

ファイル: Forecast_Deep.py プロジェクト: melinmmmm/comprehensive_crm

#The Gamma-Gamma model assumes that there is no relationship between the monetary value and the purchase frequency
customer_detail[['avg_order_value', 'frequency']].corr()

# In[15]:

#It is used to estimate the average monetary value of customer transactions
from lifetimes import GammaGammaFitter

gg = GammaGammaFitter(penalizer_coef=0.001)
gg.fit(customer_detail['frequency'],
       customer_detail['avg_order_value'],
       verbose=True)

print(
    gg.conditional_expected_average_profit(
        customer_detail['frequency'],
        customer_detail['avg_order_value']).head(10))

# In[16]:

customer_detail['clv'] = gg.customer_lifetime_value(
    mbgnbd,
    customer_detail['frequency'],
    customer_detail['recency'],
    customer_detail['T'],
    customer_detail['avg_order_value'],
    time=t,
    discount_rate=0).astype(int)
customer_detail[[
    'frequency', 'pred_90d_bgf', 'monetary', 'avg_order_value', 'clv'
]].head()

コード例 #11

0

ファイルを表示

def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    # recency user-specific
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days,       # "recency_cltv_p"
                                                                lambda date: (today_date - date.min()).days],      # "T"
                                                'Invoice': lambda num: num.nunique(),                              # "frequency"
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})                # "monetary"
    rfm.columns = rfm.columns.droplevel(0)

    # recency_cltv_p
    rfm.columns = ["recency_cltv_p", "T", "frequency", "monetary"]

    # Simplified monetary_avg (since Gamma-Gamma model requires this way)
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]
    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # Calculating WEEKLY RECENCY VE WEEKLY T for BG/NBD MODEL
    # recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CHECK IT OUT! Monetary avg must be positive
    rfm = rfm[rfm["monetary_avg"] > 0]

    # recency filter
    rfm = rfm[(rfm["frequency"] > 1)]
    rfm["frequency"] = rfm["frequency"].astype(int)  # converting it to integer just in case!

    # Establishing the BGNBD Model
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm["frequency"],
            rfm["recency_weekly_cltv_p"],
            rfm["T_weekly"])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm["frequency"],
                                           rfm["recency_weekly_cltv_p"],
                                           rfm["T_weekly"])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm["frequency"],
                                           rfm["recency_weekly_cltv_p"],
                                           rfm["T_weekly"])

    # Establishing Gamma-Gamma Model  calculates=> Expected Average Profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm["frequency"], rfm["monetary_avg"])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm["frequency"],
                                                                             rfm["monetary_avg"])
    # CLTV Pred for 6 months
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm["frequency"],
                                       rfm["recency_weekly_cltv_p"],
                                       rfm["T_weekly"],
                                       rfm["monetary_avg"],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # Minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # rfm.fillna(0, inplace=True)

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    # recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm

コード例 #12

0

ファイルを表示

ファイル: AI.py プロジェクト: 0424048/AICRM

def predicted_purchase_time(account, timesteap):
    # df = pd.read_csv('AIexcel/' + account + '.csv' , sep=',', names=['name','uuid','invoiceDate','produce_name','Total'],encoding='utf8',low_memory=False)
    df = pd.read_csv(
        'AIexcel/' + account + '.csv',
        names=['name', 'uuid', 'invoiceDate', 'produce_name', 'Total'],
        sep=',',
        encoding='utf8',
        low_memory=False)
    #df.rename(columns={u'收件人姓名':u'name', u'收件人手機':u'uuid', u'付款日期':u'invoiceDate', u'商品名稱':u'produce_name', u'商品總價':u'Total'}, inplace=True)
    df_ga = pd.read_csv('AIexcel/' + account + '_ga.csv',
                        names=['uuid', 'level', 'next_time'],
                        sep=',',
                        encoding='utf8',
                        low_memory=False)
    df_UserLabel = df_ga['level'][1:].tolist()
    df_ga.drop([0], inplace=True)
    if 'level' in df_ga:
        df_ga['level'] = df_ga.apply(ga_toLevel, axis=1)

    df = df.ix[df.invoiceDate.str.len() == 19]
    df = df.ix[df.name.str.len() <= 10]
    # take three columns
    df1 = df[['uuid', 'invoiceDate', 'Total']]
    # drop price == 1
    df1_ = df1.drop(df1[df1['invoiceDate'] == 1].index)
    # drop non-data
    df_drop = df1_.dropna()
    # change columns name
    dataframe = df_drop
    dataframe['invoiceDate'] = pd.to_datetime(dataframe['invoiceDate']).dt.date
    dataframe.Total = dataframe.Total.astype(float)
    data = summary_data_from_transaction_data(
        dataframe,
        'uuid',
        'invoiceDate',
        observation_period_end=dataframe.invoiceDate.max())
    data2 = summary_data_from_transaction_data(
        dataframe,
        'uuid',
        'invoiceDate',
        monetary_value_col='Total',
        observation_period_end=dataframe.invoiceDate.max())

    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(data['frequency'], data['recency'], data['T'])
    purchase_time = data
    purchase_time[
        'predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
            30, data['frequency'], data['recency'], data['T'])
    predicted_purchases_df = purchase_time[[
        'predicted_purchases'
    ]].sort_values(by='predicted_purchases', ascending=False)
    predicted_purchases_df['cycle'] = data['recency'] / data['frequency']
    returning_customers_summary = data2[(data2['frequency'] > 0)
                                        & (data2['monetary_value'] != 0)]
    ggf = GammaGammaFitter(penalizer_coef=0.001)
    ggf.fit(returning_customers_summary['frequency'],
            returning_customers_summary['monetary_value'])
    income = ggf.conditional_expected_average_profit(
        returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value']).to_frame()
    income.columns = ['predicted_price']
    predicted_purchases_df = predicted_purchases_df.merge(income,
                                                          on=['uuid'],
                                                          how='left')
    predicted_purchases_df.reset_index(inplace=True)

    mask = predicted_purchases_df.predicted_purchases > 1
    predicted_purchases_df.loc[mask, 'predicted_purchases'] = 1
    predicted_purchases_df['predicted_purchases'] = predicted_purchases_df[
        'predicted_purchases'].astype(float)
    predicted_purchases_df = predicted_purchases_df.sort_values(
        by=['predicted_purchases'], ascending=False)
    predicted_purchases_df['predicted_purchases'] = predicted_purchases_df[
        'predicted_purchases'].apply(lambda x: format(x, '.2%'))

    predicted_purchases_df = predicted_purchases_df.merge(df_ga,
                                                          left_on="uuid",
                                                          right_on="uuid",
                                                          how='left')

    predicted_purchases_df['level'] = predicted_purchases_df.apply(flag_df,
                                                                   axis=1)
    #predicted_purchases_df['level'] = predicted_purchases_df['level'].fillna(1)
    predicted_purchases_df.replace(np.nan, 0, inplace=True)
    predicted_purchases_df.replace(np.inf, 0, inplace=True)
    if 'next_time' not in predicted_purchases_df.columns:
        predicted_purchases_df['next_time'] = np.nan
    predicted_purchases_df['next_time'] = pd.to_datetime(
        predicted_purchases_df['next_time'])

    predicted_purchases_df_N = predicted_purchases_df[~(
        predicted_purchases_df.uuid.isin(
            ((predicted_purchases_df[predicted_purchases_df.next_time >= today]
              .uuid).astype(str)).tolist()))]
    predicted_purchases_df_off = predicted_purchases_df[(
        predicted_purchases_df.uuid.isin(
            ((predicted_purchases_df[predicted_purchases_df.next_time >= today]
              .uuid).astype(str)).tolist()))]
    new_df = predicted_purchases_df_N.append(predicted_purchases_df_off,
                                             ignore_index=True)
    predicted_purchases_df_N['cycle'] = (
        predicted_purchases_df_N['cycle'] *
        predicted_purchases_df_N['level']).round(0).astype(int)
    predicted_purchases_df_N[
        'next_time'] = today + predicted_purchases_df_N.apply(time_df, axis=1)
    predicted_purchases_df_NQ = predicted_purchases_df_N.dropna()
    predicted_purchases_df_off = predicted_purchases_df_off.drop(
        columns=['predicted_purchases', 'cycle', 'predicted_price'])
    predicted_purchases_df_NQ = predicted_purchases_df_NQ.drop(
        columns=['predicted_purchases', 'cycle', 'predicted_price'])
    df_ga = df_ga.merge(predicted_purchases_df_off,
                        left_on="uuid",
                        right_on="uuid",
                        how='left')
    df_ga = df_ga.merge(predicted_purchases_df_NQ,
                        left_on="uuid",
                        right_on="uuid",
                        how='left')
    notNull_df = df_ga[
        df_ga['level'].notnull() & df_ga['next_time'].notnull()].drop(
            columns=['level_y', 'next_time_y', 'next_time_x', 'level_x'])
    notNull_df2 = df_ga[
        df_ga['level_y'].notnull() & df_ga['next_time_y'].notnull()].drop(
            columns=['level', 'next_time', 'next_time_x', 'level_x'])
    notNull_df2.columns = ['uuid', 'level', 'next_time']
    res = pd.concat([notNull_df, notNull_df2], axis=0, ignore_index=True)
    res.rename(columns={u'uuid': u'收件人手機'}, inplace=True)
    res['UserLabel'] = pd.Series(df_UserLabel)
    res = res[[u'收件人手機', u'UserLabel', u'next_time']]
    # res.to_csv('AIexcel/' + account + '_ga.csv',index=False,encoding='utf8')
    predicted_purchases_df_N = predicted_purchases_df_N.drop(
        columns=['level', 'cycle', 'next_time'])
    predicted_purchases_df_N.columns = [u'收件人手機', u'顧客購買機率', u'平均交易金額']

    return predicted_purchases_df_N


# print(predicted_purchase_time(account,30)[:30])

コード例 #13

0

ファイルを表示

    plt.savefig('recencymatrix.png')
    plt.close()
    plot_probability_alive_matrix(bgf)
    #pylab.show()
    plt.savefig('probability.png')
    individual = summary.iloc[20]
    #print(individual)

    t = 7
    print("\n\n\nselected customer probability in next week")
    print(
        bgf.conditional_expected_number_of_purchases_up_to_time(
            t, individual['frequency'], individual['recency'],
            individual['T']))
    summary['predicted_purchases'] = (
        bgf.conditional_expected_number_of_purchases_up_to_time(
            t, summary['frequency'], summary['recency'], summary['T']))
    print(summary.head())

    summary2 = summary[summary['frequency'] > 0]
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(summary2['frequency'], summary2['monetary_value'])
    print(ggf)
    print("\n\n\nSelected customer clv")
    print(
        ggf.conditional_expected_average_profit(individual['frequency'],
                                                individual['monetary_value']))
    summary['clv'] = (ggf.conditional_expected_average_profit(
        summary2['frequency'], summary2['monetary_value']))
    print(summary.head())

コード例 #14

0

ファイルを表示

################################################################
# Tahmin Sonuçlarının Değerlendirilmesi
################################################################

plot_period_transactions(bgf)
plt.show()

##############################################################
# 3. GAMMA-GAMMA Modelinin Kurulması
##############################################################

ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(rfm['frequency'], rfm['monetary_avg'])

ggf.conditional_expected_average_profit(rfm['frequency'],
                                        rfm['monetary_avg']).head(10)

ggf.conditional_expected_average_profit(
    rfm['frequency'],
    rfm['monetary_avg']).sort_values(ascending=False).head(10)

rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(
    rfm['frequency'], rfm['monetary_avg'])

rfm.sort_values("expected_average_profit", ascending=False).head(20)

##############################################################
# 4. BG-NBD ve GG modeli ile CLTV'nin hesaplanması.
##############################################################

cltv = ggf.customer_lifetime_value(

コード例 #15

0

ファイルを表示

ファイル: ARL.py プロジェクト: berk-123/ARL-Assocation-Rule-Learning

def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    ## recency for users dinamic.
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days,
                                                                lambda date: (today_date - date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})

    rfm.columns = rfm.columns.droplevel(0)

    ## recency_cltv_p
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    ## simplified monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]

    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # BGNBD CALCULATE WEEKLY RECENCY AND WEEKLY T for
    ## recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]

    ## recency filtre (cltv_p for much better calculation)
    rfm = rfm[(rfm['frequency'] > 1)]

    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly_cltv_p'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 months cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    ## recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm

コード例 #16

0

ファイルを表示

def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate': [
            lambda date: (date.max() - date.min()).days, lambda date:
            (today_date - date.min()).days
        ],
        'Invoice':
        lambda num: num.nunique(),
        'TotalPrice':
        lambda price: price.sum()
    })
    rfm.columns = rfm.columns.droplevel(0)

    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']
    rfm['monetary'] = rfm['monetary'] / rfm['frequency']

    rfm.rename(columns={'monetary': 'monetary_avg'}, inplace=True)

    rfm["recency_weekly_cltv_p"] = rfm['recency_cltv_p'] / 7
    rfm['T_weekly'] = rfm['T'] / 7

    rfm = rfm[rfm['monetary_avg'] > 0]
    rfm = rfm[(rfm['frequency'] > 1)]
    rfm['frequency'] = rfm['frequency'].astype(int)

    #BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'])

    rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    #Gamma Gamma
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(
        rfm['frequency'], rfm['monetary_avg'])

    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq='W',
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    rfm = rfm[[
        "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p",
        "T_weekly", "exp_sales_1_month", "exp_sales_3_month",
        "expected_average_profit", "cltv_p", "cltv_p_segment"
    ]]

    return rfm

コード例 #17

0

ファイルを表示

ファイル: CLV.py プロジェクト: vaskopozharski-00306661T/IDS201---Assessment-3

from lifetimes import BetaGeoFitter

bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(data['frequency'], data['recency'], data['T'])

future_horizon = 10000
data['predicted_purchases'] = bgf.predict(future_horizon, data['frequency'],
                                          data['recency'], data['T'])
data.head()

from lifetimes import GammaGammaFitter

returning_customers_summary = data[data['frequency'] > 0]

ggf = GammaGammaFitter(penalizer_coef=0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
transaction_spend = ggf.conditional_expected_average_profit(
    data['frequency'], data['monetary_value']).mean()
print(transaction_spend)

customers_pm = customers_ac.join(data['predicted_purchases'],
                                 on='id',
                                 how='left').drop(columns='clv')

customers_pm['clv'] = customers_pm \
    .apply(
        lambda x: x['predicted_purchases'] * transaction_spend,
        axis = 1)
customers_pm.tail()

コード例 #18

0

ファイルを表示

ファイル: Indiclv.py プロジェクト: meghashyam97/Transaction-Analysis

    #plot_frequency_recency_matrix(bgf)
    #pylab.show()
    #plot_probability_alive_matrix(bgf)
    #pylab.show()
    index = 0
    val = (sys.argv[1])
    for row in summary:
        if row[0] == val:
            break
        else:
            index += 1
    individual = summary.iloc[index]
    #print(individual)

    t = 7
    #print("\n\n\nselected customer probability in next week")
    #print(bgf.conditional_expected_number_of_purchases_up_to_time(t,individual['frequency'],individual['recency'],individual['T']))
    #summary['predicted_purchases']=(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']))
    #print (summary.head())

    summary2 = summary[summary['frequency'] > 0]
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(summary2['frequency'], summary2['monetary_value'])
    #print (ggf)
    #print("\n\n\nSelected customer clv")
    print(
        ggf.conditional_expected_average_profit(individual['frequency'],
                                                individual['monetary_value']))
    #summary['clv']=(ggf.conditional_expected_average_profit(summary2['frequency'],summary2['monetary_value']))
    #print(summary.head())

コード例 #19

0

ファイルを表示

def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (today_date - date.max()).days,
                                                                lambda date: (today_date - date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})

    rfm.columns = rfm.columns.droplevel(0)

    rfm.columns = ['recency', 'T', 'frequency', 'monetary']

    # CALCULATION OF MONETARY AVG & ADDING RFM INTO DF
    temp_df = dataframe.groupby(["Customer ID", "Invoice"]).agg({"TotalPrice": ["mean"]})
    temp_df = temp_df.reset_index()
    temp_df.columns = temp_df.columns.droplevel(0)
    temp_df.columns = ["Customer ID", "Invoice", "total_price_mean"]
    temp_df2 = temp_df.groupby(["Customer ID"], as_index=False).agg({"total_price_mean": ["mean"]})
    temp_df2.columns = temp_df2.columns.droplevel(0)
    temp_df2.columns = ["Customer ID", "monetary_avg"]

    rfm = rfm.merge(temp_df2, how="left", on="Customer ID")
    rfm.set_index("Customer ID", inplace=True)
    rfm.index = rfm.index.astype(int)

    # CALCULATION OF WEEKLY RECENCY AND WEEKLY T FOR BGNBD
    rfm["recency_weekly"] = rfm["recency"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]
    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.001)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.001)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 MONTHS cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    rfm = rfm[["monetary_avg", "T", "recency_weekly", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm

コード例 #20

0

ファイルを表示

ファイル: Main.py プロジェクト: colllz/CLV

returning_customers_summary = modeldata[modeldata['frequency']>0]

print(len(returning_customers_summary))
returning_customers_summary.shape

from lifetimes import GammaGammaFitter

ggf = GammaGammaFitter(penalizer_coef = 0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])

print(ggf)

returning_customers_summary = returning_customers_summary[returning_customers_summary['monetary_value']>0]

returning_customers_summary['predicted_avg_sales']=ggf.conditional_expected_average_profit(returning_customers_summary['frequency'],returning_customers_summary['monetary_value'])



# checking the expevred average value and the actual average value in the data to make sure the values are good
print(f"Expected Average sales: {returning_customers_summary['predicted_avg_sales'].mean()}")
print(f"Actual Average sales: {returning_customers_summary['monetary_value'].mean()}")
# The values seem to be fine

#calculating CLV for 1 month
returning_customers_summary['Predicted_CLV'] = ggf.customer_lifetime_value(bgf,
                                                                           returning_customers_summary['frequency'],
                                                                           returning_customers_summary['recency'],
                                                                           returning_customers_summary['T'],
                                                                           returning_customers_summary['monetary_value'],
                                                                           time=1, # lifetime in months

コード例 #21

0

ファイルを表示

ファイル: CLTV_predictions.py プロジェクト: oozanguner/Customer_Lifetime_Value_Prediction

# Expected sales for whole company for 1 week
bgf.conditional_expected_number_of_purchases_up_to_time (4,
                                                         cltv["frequency"],
                                                         cltv["recency_weekly"],
                                                         cltv["T_weekly"]).sort_values (ascending=False).sum ()

plot_period_transactions (bgf)
plt.show ()

######
# GAMMA-GAMMA
#####
ggf = GammaGammaFitter (penalizer_coef=0.01)
ggf.fit (cltv["frequency"], cltv["monetary_avg"])

cltv["expected_average_profit"] = ggf.conditional_expected_average_profit (cltv["frequency"], cltv["monetary_avg"])

cltv.sort_values (by="expected_average_profit", ascending=False).head ()

###########
# 4. CLTV calculation with BG-NBD and GG models
###########
cltv["cltv_six_months"] = ggf.customer_lifetime_value (bgf,
                                                       cltv["frequency"],
                                                       cltv["recency_weekly"],
                                                       cltv["T_weekly"],
                                                       cltv["monetary_avg"],
                                                       time=6,
                                                       discount_rate=0.01,
                                                       freq="W")

コード例 #22

0

ファイルを表示

ファイル: tools.py プロジェクト: dgmiller/portfolio

class CLV(object):
    """
    INPUT
        pmg_num (int) the product market group number, default = 1
        outfile1 (str) the filename indicating where to store the raw data before analysis, default = '../data/clvtrainingset01.csv'
        outfile2 (str) the filename containing the results, default = '../data/clv01.csv'
        date_range (list) the start date and end date of the years to analyze, default = ['2008-09-01','2016-09-01']
    attributes other than those listed above
        self.data (DataFrame) a pandas DataFrame object of the data to be used for analysis
        self.bgf (from lifetimes) a statistical model object from the lifetimes package
        self.ggf (from lifetimes) a statistical model object from the lifetimes package
        self.results (DataFrame) a pandas DataFrame object of the results of analysis
    """
    def __init__(self,pmg_num=1,outfile1='../data/clvtrainingset01.csv',outfile2='../data/clv01.csv',date_range=['2008-09-01','2016-09-01']):
        self.pmg_num = pmg_num
        # outfile1 stores a clean version of the raw data used for analysis; this is important for reproducibility
        self.outfile1 = outfile1
        # outfile2 stores the clv estimation results
        self.outfile2 = outfile2
        self.date_range = date_range
        self.data = None
        self.bgf = None
        self.ggf = None
        self.results = None

    def get_data_from_server(self,cmd=None):
        """
        Gets data from sales_db and stores the query results in self.data
        INPUT
            cmd (str) the default sql query is below

            The default query has been replaced. The original query was an 8 line select command.
        """
        # server name
        dsn = "THE SERVER NAME"
        cnxn_name = "DSN=%s" % dsn
        connection = odbc.connect(cnxn_name) # use to access the database
        c = connection.cursor() # generate cursor object
        
        # Grab transaction data from Postgres
        if not cmd:
            cmd = """SQL DEFAULT COMMAND GOES HERE""" % (self.pmg_num,self.date_range[0],self.date_range[1])
        
        c.execute(cmd) # execute the sql command
        
        # list to store the query data
        transaction_data = []
        
        # create a dictionary to convert customer ids to name
        to_name = dict(np.genfromtxt('../data/names.csv',dtype=str,delimiter='\t'))
        
        for row in c:
            cust, rsv_date, sales = row # pull data from each row of the query data
            cust_id = str(int(cust))
            name = to_name[cust_id]
            # check to see if customer is inactive
            if use(name):
                rsv_date1_readable = rsv_date.strftime('%Y-%m-%d') # date formatting
                sales_float = float(sales) # convert to float; represents the transaction amount
                transaction_data.append({"id":cust, "date":rsv_date, "sales":sales_float}) # add dictionary of data to list
        
        # convert to dataframe
        df = pd.DataFrame(transaction_data, columns=['id', 'date', 'sales'])
        # store results
        df.to_csv(self.outfile1,index=False)
        # IMPORTANT: use correct observation_period_end date
        self.data = summary_data_from_transaction_data(df, 'id', 'date', 'sales', observation_period_end=self.date_range[1], freq='M')

    def get_data_from_file(self,filename,**kwargs):
        df = pd.read_csv(filename,**kwargs)
        self.data = summary_data_from_transaction_data(df, 'id', 'date', 'sales', observation_period_end=self.date_range[1], freq='M')

    def fit(self,months=96):
        """
        Computes CLV estimates for the next n months and stores results in self.results
        INPUT
            months (int) number of months to predict, default = 96 (8 years)
        """
        ### PREDICT NUMBER OF PURCHASES
        self.bgf = BetaGeoFitter() # see lifetimes module documentation for details
        self.bgf.fit(self.data['frequency'], self.data['recency'], self.data['T'])
        # 8 years = 96 months
        self.data['predicted_purchases'] = self.bgf.conditional_expected_number_of_purchases_up_to_time(
                months,
                self.data['frequency'],
                self.data['recency'],
                self.data['T'])

        ### PREDICT FUTURE PURCHASE AMOUNT
        self.ggf = GammaGammaFitter(penalizer_coef = 0)
        self.ggf.fit(self.data['frequency'], self.data['monetary_value'])
        # predict next transaction
        self.data['predicted_trans_profit'] = self.ggf.conditional_expected_average_profit(
                frequency = self.data['frequency'],
                monetary_value = self.data['monetary_value'])
        
        ### ESTIMATE CLV
        self.data['clv_estimation'] = self.data['predicted_trans_profit'] * self.data['predicted_purchases']
        self.data['prob_alive'] = self.bgf.conditional_probability_alive(
                self.data['frequency'],
                self.data['recency'],
                self.data['T'])
        self.results = self.data.sort_values(by='clv_estimation',ascending=False)
        # store results
        self.results.to_csv(self.outfile2,index=False)

    def plot_matrices(self):
        """
        plots three matrices:
            probability alive matrix: displays the probability that a customer is active
            frequency recency matrix: displays frequency and recency with color corresponding
                                        to monetary value
            period transactions: displays predicted and actual transaction values over time
            (check documentation in lifetimes for more details)
        """
        plot_probability_alive_matrix(self.bgf,cmap='viridis')
        plot_frequency_recency_matrix(self.bgf,cmap='viridis')
        plot_period_transactions(self.bgf)

コード例 #23

0

ファイルを表示

ファイル: online_retail_cltv.py プロジェクト: simgeerek/CLTV

##############################################################
# GAMMA GAMMA MODEL
##############################################################

# Gamma Gamma'yı kullanabileceğimizden emin olmak için, frekans ve parasal değerlerin
# ilişkili olup olmadığını kontrol etmemiz gerekir. (?)
combined_data[['monetary_value_cal', 'frequency_cal']].corr()
# Korelasyon düşük, devam

#Model fit
ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(combined_data['frequency_cal'], combined_data['monetary_value_cal'])

#Prediction
monetary_pred = ggf.conditional_expected_average_profit(
    combined_data['frequency_cal'], combined_data['monetary_value_cal'])

# Actual values ile predicted values gözlemlemek için yeni bir dataframe oluşturma
df_comp_m = pd.DataFrame()
df_comp_m["ActualMonetary"] = combined_data['monetary_value_holdout']
df_comp_m["Predicted"] = monetary_pred
df_comp_m.head(20)

print("Expected Average Sales: %s" % monetary_pred.mean())
print("Actual Average Sales: %s" %
      combined_data["monetary_value_holdout"].mean())
print("Difference: %s" %
      (combined_data["monetary_value_holdout"].mean() - monetary_pred.mean()))
print(
    "Mean Squared Error: %s" %
    mean_squared_error(combined_data["monetary_value_holdout"], monetary_pred))

コード例 #24

0

ファイルを表示

t = 1
df["predicted_purchases"] = bgf.conditional_expected_number_of_purchases_up_to_time(
    t, df["FREQUENCY"], df["RECENCY"], df["T"])
df.sort_values(by="predicted_purchases").tail(10)

# ==========================================================================
# Gamma Gamme Model
# Model assumes that there is no relationship between the monetary value and the purchase frequency
# ==========================================================================

df[["MONETARY_VALUE", "FREQUENCY"]].corr()

ggf = GammaGammaFitter(penalizer_coef=0)
ggf.fit(df["FREQUENCY"], df["MONETARY_VALUE"])

ggf.conditional_expected_average_profit(df["FREQUENCY"],
                                        df["MONETARY_VALUE"]).head(10)

print("Expected conditional average profit: %s, Average profit: %s" %
      (ggf.conditional_expected_average_profit(df["FREQUENCY"],
                                               df["MONETARY_VALUE"]).mean(),
       df[df["FREQUENCY"] > 0]["MONETARY_VALUE"].mean()))

bgf.fit(df["FREQUENCY"], df["RECENCY"], df["T"])

pred = ggf.customer_lifetime_value(
    bgf,  #the model to use to predict the number of future transactions
    df["FREQUENCY"],
    df["RECENCY"],
    df["T"],
    df["MONETARY_VALUE"],
    time=1,  # year

コード例 #25

0

ファイルを表示

summary_ggf = summary.loc[(summary.frequency > 0)
                          & (summary.monetary_value > 0)]

summary_ggf.columns

summary_ggf[['frequency', 'monetary_value']].corr()

summary_ggf.monetary_value.hist()

from lifetimes import GammaGammaFitter

ggf = GammaGammaFitter(penalizer_coef=0.0)
ggf.fit(summary_ggf['frequency'], summary_ggf['monetary_value'])

ggf.conditional_expected_average_profit(summary_ggf['frequency'],
                                        summary_ggf['monetary_value']).head(10)

bgf.fit(summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T'])

bgf.fit(summary_ggf['frequency'], summary_ggf['recency'], summary_ggf['T'])

ggf.customer_lifetime_value(
    bgf,  #the model to use to predict the number of future transactions
    summary_ggf['frequency'],
    summary_ggf['recency'],
    summary_ggf['T'],
    summary_ggf['monetary_value'],
    time=12,  # months
    discount_rate=0.01  # monthly discount rate ~ 12.7% annually
).head(10)

コード例 #26

0

ファイルを表示

ファイル: ltv_calc_v1.py プロジェクト: mozilla-it/vertical-ltv

def generate_clv_table(data, clv_prediction_time=None, model_penalizer=None):

    #set default values if they are not stated
    if clv_prediction_time is None:
        clv_prediction_time = 12
    if model_penalizer is None:
        model_penalizer = 0

    # Reformat csv as a Pandas dataframe
    #data = pd.read_csv(csv_file)

    #Remove non search sessions
    data = data[data['Searches'] > 0]

    max_date = data['activity_date'].max()

    # Using "summary_data_from_transaction_data" function to agregate the activity stream into the appropriate metrics
    # Model requires 'activity_date' column name.  For our purpose this is synonymous with submission_date.
    summary = summary_data_from_transaction_data(
        data,
        'client_id',
        'activity_date',
        'Revenue',
        observation_period_end=max_date)

    # Building the Model using BG/NBD
    bgf = BetaGeoFitter(penalizer_coef=model_penalizer)
    bgf.fit(summary['frequency'], summary['recency'], summary['T'])

    # Conditional expected purchases
    # These are the expected purchases expected from each individual given the time specified

    # t = days in to future
    t = 14
    summary[
        'predicted_searches'] = bgf.conditional_expected_number_of_purchases_up_to_time(
            t, summary['frequency'], summary['recency'], summary['T'])

    #Conditional Alive Probability
    summary['alive_prob'] = summary.apply(
        lambda row: calc_alive_prob(row, bgf), axis=1)
    summary['alive_prob'] = summary['alive_prob'].astype(float)
    #print summary['alive_prob']

    # There cannot be non-positive values in the monetary_value or frequency vector
    summary_with_value_and_returns = summary[(summary['monetary_value'] > 0)
                                             & (summary['frequency'] > 0)]

    # There cannot be zero length vectors in one of frequency, recency or T
    #summary_with_value_and_returns =
    #print summary_with_value_and_returns[
    #    (len(summary_with_value_and_returns['recency'])>0) &
    #    (len(summary_with_value_and_returns['frequency'])>0) &
    #    (len(summary_with_value_and_returns['T'])>0)
    #]

    if any(
            len(x) == 0 for x in [
                summary_with_value_and_returns['recency'],
                summary_with_value_and_returns['frequency'],
                summary_with_value_and_returns['T']
            ]):
        logger.debug(data['client_id'])

    # Setting up Gamma Gamma model
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(summary_with_value_and_returns['frequency'],
            summary_with_value_and_returns['monetary_value'])

    # Output average profit per tranaction by client ID
    ggf_output = ggf.conditional_expected_average_profit(
        summary_with_value_and_returns['frequency'],
        summary_with_value_and_returns['monetary_value'])

    # Refitting the BG/NBD model with the same data if frequency, recency or T are not zero length vectors
    if not (len(x) == 0 for x in [
            summary_with_value_and_returns['recency'],
            summary_with_value_and_returns['frequency'],
            summary_with_value_and_returns['T']
    ]):
        bgf.fit(summary_with_value_and_returns['frequency'],
                summary_with_value_and_returns['recency'],
                summary_with_value_and_returns['T'])

    # Getting Customer lifetime value using the Gamma Gamma output
    # NOTE: the time can be adjusted, but is currently set to 12 months

    customer_predicted_value = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        summary_with_value_and_returns['frequency'],
        summary_with_value_and_returns['recency'],
        summary_with_value_and_returns['T'],
        summary_with_value_and_returns['monetary_value'],
        time=clv_prediction_time,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    )

    # Converting to dataframe
    df_cpv = pd.DataFrame({
        'client_id': customer_predicted_value.index,
        'pred_values': customer_predicted_value.values
    })

    # Setting client_id as index
    df_cpv = df_cpv.set_index('client_id')

    # Merge with original summary
    df_merged = pd.merge(summary,
                         df_cpv,
                         left_index=True,
                         right_index=True,
                         how='outer')

    # Historical CLV
    data_hist = data.groupby(
        ['client_id'])['Searches',
                       'Revenue'].apply(lambda x: x.astype(float).sum())

    # Merge with original summary
    df_final = pd.merge(df_merged,
                        data_hist,
                        left_index=True,
                        right_index=True,
                        how='outer')

    # Prevent NaN on the pred_clv column
    df_final.pred_values[df_final.frequency == 0] = 0.0

    # Create column that combines historical and predicted customer value
    df_final['total_clv'] = df_final['pred_values'] + df_final['Revenue']

    # Create column which calculates in days the number of days since they were last active
    df_final['last_active'] = df_final['T'] - df_final['recency']

    # Create a column which labels users inactive over 14 days as "Expired" ELSE "Active"
    df_final['user_status'] = np.where(df_final['last_active'] > 14, 'Expired',
                                       'Active')

    # Add column with date of calculation
    # Set calc_date to max submission date
    df_final['calc_date'] = max_date.date()  #pd.Timestamp('today').date()

    # Rename columns as appropriate
    df_final.columns = [
        'frequency', 'recency', 'customer_age', 'avg_session_value',
        'predicted_searches_14_days', 'alive_probability',
        'predicted_clv_12_months', 'historical_searches', 'historical_clv',
        'total_clv', 'days_since_last_active', 'user_status', 'calc_date'
    ]

    #Prevent non returning customers from having 100% alive probability
    df_final.alive_probability[df_final.frequency == 0] = 0.0

    return df_final

コード例 #27

0

ファイルを表示

summary_with_money_value.head()
## Filtering out customers who have only 1 purchase
returning_customers_summary = summary_with_money_value[
    summary_with_money_value['frequency'] > 0]

############################### Average Profit Calulation ##########
#At this point we can train our Gamma-Gamma submodel and predict the conditional, expected average lifetime value of our customers.
from lifetimes import GammaGammaFitter
ggf = GammaGammaFitter(penalizer_coef=0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
print(ggf)

#We can now estimate the average transaction value:
AVG_Profit = ggf.conditional_expected_average_profit(
    returning_customers_summary['frequency'],
    returning_customers_summary['monetary_value'])
AVG_Profit = pd.Series(AVG_Profit)

############################### Customer Life Time Value Calculationn ##########
# refit the BG model to the summary_with_money_value dataset, #the model to use to predict the number of future transactions
from lifetimes import BetaGeoFitter
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['recency'],
        returning_customers_summary['T'])

CLV_1Year = ggf.customer_lifetime_value(
    bgf,
    returning_customers_summary['frequency'],
    returning_customers_summary['recency'],