예제 #1
0
def clv(pareto, mbg, summary):

    returning_customers_summary = summary[summary['frequency'] > 0]

    ggf = GammaGammaFitter(penalizer_coef=0.0)
    ggf.fit(frequency=returning_customers_summary['frequency'],
            monetary_value=returning_customers_summary['monetary_value'])

    pred_clv_pareto = ggf.customer_lifetime_value(
        transaction_prediction_model=pareto,
        frequency=summary['frequency'],
        recency=summary['recency'],
        T=summary['T'],
        monetary_value=summary['monetary_value'],
        time=12,
        freq="D")

    pred_clv_mbg = ggf.customer_lifetime_value(
        transaction_prediction_model=mbg,
        frequency=summary['frequency'],
        recency=summary['recency'],
        T=summary['T'],
        monetary_value=summary['monetary_value'],
        time=12,
        freq="D")

    return pred_clv_pareto, pred_clv_mbg
def readGammaGammaFitterModel():

    gammaGammaFitterModel = GammaGammaFitter()

    gammaGammaFitterModel.load_model("GammaGammaFitterModel.pkl")

    return gammaGammaFitterModel
def create_cltv_pred(dataframe, w=4, m=1):
    """
    Gamagama and BGNBD model and prediction
    Parameters
    ----------
    dataframe
    w: int, week information for BGNBD model
    m: int, month information for gamama model

    Returns
    Dataframe
    -------

    """
    # BGNBD

    dataframe = dataframe[dataframe["monetary_avg"] > 0]
    dataframe["frequency"] = dataframe["frequency"].astype(int)

    bgf = BetaGeoFitter(penalizer_coef=0.001)
    bgf.fit(dataframe['frequency'], dataframe['recency_weekly'],
            dataframe['T_weekly'])

    dataframe[f'exp_sales_{w}_week'] = bgf.predict(w, dataframe['frequency'],
                                                   dataframe['recency_weekly'],
                                                   dataframe['T_weekly'])

    # Gamagama - expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.001)
    ggf.fit(dataframe['frequency'], dataframe['monetary_avg'])
    dataframe[
        "expected_average_profit"] = ggf.conditional_expected_average_profit(
            dataframe['frequency'], dataframe['monetary_avg'])

    # CLTV Prediction
    cltv = ggf.customer_lifetime_value(bgf,
                                       dataframe['frequency'],
                                       dataframe['recency_weekly'],
                                       dataframe['T_weekly'],
                                       dataframe['monetary_avg'],
                                       time=m,
                                       freq="W",
                                       discount_rate=0.01)

    dataframe[f'cltv_p_{m}_month'] = cltv

    scaler = MinMaxScaler(feature_range=(1, 100))
    dataframe['cltv_p_score'] = scaler.fit_transform(
        dataframe[[f'cltv_p_{m}_month']])

    # cltv_p Segment
    dataframe['cltv_p_segment'] = pd.qcut(dataframe['cltv_p_score'],
                                          3,
                                          labels=['C', 'B', 'A'])

    new_col = dataframe.columns[~dataframe.columns.
                                isin(['recency', 'frequency', 'monetary'])]
    dataframe = dataframe[new_col]

    return dataframe
예제 #4
0
def run_btyd(model_type, data_src, threshold_date, predict_end):
  """Run selected BTYD model on data files located in args.data_src.

  Args:
    model_type:                 model type (PARETO, BGNBD)
    data_src:                   path to data
    threshold_date:             end date for training data 'YYYY-mm-dd'
    predict_end:                end date for predictions 'YYYY-mm-dd'
  """
  train_end_date = datetime.strptime(threshold_date, '%Y-%m-%d')
  predict_end_date = datetime.strptime(predict_end, '%Y-%m-%d')

  # load training transaction data
  summary, actual_df = load_data(data_src)

  # train fitter for selected model
  tf.logging.info('Fitting model...')

  if model_type == PARETO:
    fitter = paretonbd_model(summary)
  elif model_type == BGNBD:
    fitter = bgnbd_model(summary)

  tf.logging.info('Done.')

  #
  # use trained fitter to compute actual vs predicted ltv for each user
  #

  # compute the number of days in the prediction period
  time_days = (predict_end_date - train_end_date).days
  time_months = int(math.ceil(time_days / 30.0))

  # fit gamma-gamma model
  tf.logging.info('Fitting GammaGamma model...')

  ggf = GammaGammaFitter(penalizer_coef=0)
  ggf.fit(summary['frequency'], summary['monetary_value'])

  tf.logging.info('Done.')

  ltv, rmse = predict_value(summary,
                            actual_df,
                            fitter,
                            ggf,
                            time_days,
                            time_months)

  # output results to csv
  output_file = os.path.join(data_src, OUTPUT_FILE)
  ltv.to_csv(output_file, index=False)

  # log results
  tf.logging.info('BTYD RMSE error for %s model: %.2f', model_type, rmse)
  print('RMSE prediction error: %.2f' % rmse)
예제 #5
0
def predictSpending(customerId):
    # initialize the data dictionary that will be returned
    data = {"success": False, "result": {"customerId": "", "y": 0.0}}

    # ensure the customer ID was properly uploaded to our endpoint
    if customerId:
        print("* get data")
        data = pandas.read_csv("sample_transactions.csv")
        #data = pandas.read_json(baseURL + "/api/transactions")
        #data = data.drop(columns="_id")

        print("* prepare data")
        # prepare and shaping the data
        # columns -
        #   customerId
        # 	frequency : number of repeat purchase transactions
        #	recency: time (in days) between first purchase and latest purchase
        #	T: time (in days) between first purchase and end of the period under study
        #	monetary_value: average transactions amount
        today = pandas.to_datetime(datetime.date.today())
        summaryData = summary_data_from_transaction_data(
            data,
            "customerId",
            "transactionDate",
            monetary_value_col="transactionAmount",
            observation_period_end=today)
        # filter the customer data that has no transaction
        analysisData = summaryData[summaryData["frequency"] > 0]

        # get the stat of the particular customer
        customer = analysisData.loc[customerId]

        # load model
        ggf_loaded = GammaGammaFitter()
        ggf_loaded.load_model('ggf.pkl')

        # estimate the average transaction amount
        predict = ggf_loaded.conditional_expected_average_profit(
            customer["frequency"], customer['monetary_value'])

        # add the input and predicted output to the return data
        data = {
            "success": True,
            "result": {
                "customerId": customerId,
                "y": predict
            }
        }

    # return the data dictionary as a JSON response
    return flask.jsonify(data)
예제 #6
0
def trainGammaGammaModel():

    summaryDataFromTransactionDataForCLV = readsummaryDataFromTransactionDataForCLV(
    )

    #getting those customers who have done at least one transaction with the company
    shortlistedCustomers = summaryDataFromTransactionDataForCLV[
        summaryDataFromTransactionDataForCLV["frequency"] > 0]

    gammaGammaFitterModel = GammaGammaFitter(penalizer_coef=0.0)

    gammaGammaFitterModel.fit(shortlistedCustomers["frequency"],
                              shortlistedCustomers["monetary_value"])

    saveGammaGammaFitterModel(gammaGammaFitterModel)
예제 #7
0
def estimate_clv_model(summary, model_penalizer=None):
  #set default values if they are not stated
  if model_penalizer is None:
    model_penalizer = 0

  # Building the Model using BG/NBD
  bgf = BetaGeoFitter(penalizer_coef=model_penalizer)
  bgf.fit(summary['frequency'], summary['recency'], summary['T'])

  # There cannot be non-positive values in the monetary_value or frequency vector
  summary_with_value_and_returns = summary[(summary['monetary_value']>0) & (summary['frequency']>0)]
  # Setting up Gamma Gamma model
  ggf = GammaGammaFitter(penalizer_coef = 0)
  ggf.fit(summary_with_value_and_returns['frequency'], summary_with_value_and_returns['monetary_value']) 

  # Refitting the BG/NBD model with the same data if frequency, recency or T are not zero length vectors
  if not (len(x) == 0 for x in [summary_with_value_and_returns['recency'],summary_with_value_and_returns['frequency'],summary_with_value_and_returns['T']]):
    bgf.fit(summary_with_value_and_returns['frequency'],summary_with_value_and_returns['recency'],summary_with_value_and_returns['T'])

  return [bgf, ggf]
예제 #8
0
def gg_model(rfmmod, bgf, p, f):
    # Build the Model
    ret_cust = rfmmod[(rfmmod['frequency'] > 0)
                      & (rfmmod['monetary_value'] > 0)]
    ggf = GammaGammaFitter(penalizer_coef=p)
    ggf.fit(ret_cust['frequency'], ret_cust['monetary_value'])
    pred_clt = ggf.customer_lifetime_value(
        bgf,
        ret_cust['frequency'],
        ret_cust['recency'],
        ret_cust['T'],
        ret_cust['monetary_value'],
        time=12,  # months
        freq=f,
        discount_rate=0.01)
    ret_cust['predicted_cltv'] = pred_clt
    ret_cust['exp_profit'] = ggf.conditional_expected_average_profit(
        ret_cust['frequency'], ret_cust['monetary_value'])
    ret_cust = ret_cust.sort_values('predicted_cltv', ascending=False).round(3)
    return ret_cust
예제 #9
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)
    # rfm metriklerini + tenure oluşturma
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max()-date.min()).days,
                                                                lambda date: (today_date-date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})
    rfm.columns.droplevel(0)
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    # basitleştirilmiş monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]
    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # bgnbd için haftalık recency,tenure hesaplanması
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # kontrol
    rfm = rfm[rfm["monetary_avg"] > 0]
    rfm = rfm[(rfm['frequency'] > 1)]
    rfm["frequency"] = rfm["frequency"].astype(int)

    # bgnbd
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly_cltv_p'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # expected_avg_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 aylık cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)
    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    ## recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm
예제 #10
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate': [
            lambda date: (date.max() - date.min()).days, lambda date:
            (today_date - date.min()).days
        ],
        'Invoice':
        lambda num: num.nunique(),
        'TotalPrice':
        lambda price: price.sum()
    })
    rfm.columns = rfm.columns.droplevel(0)

    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']
    rfm['monetary'] = rfm['monetary'] / rfm['frequency']

    rfm.rename(columns={'monetary': 'monetary_avg'}, inplace=True)

    rfm["recency_weekly_cltv_p"] = rfm['recency_cltv_p'] / 7
    rfm['T_weekly'] = rfm['T'] / 7

    rfm = rfm[rfm['monetary_avg'] > 0]
    rfm = rfm[(rfm['frequency'] > 1)]
    rfm['frequency'] = rfm['frequency'].astype(int)

    #BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'])

    rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    #Gamma Gamma
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(
        rfm['frequency'], rfm['monetary_avg'])

    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq='W',
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    rfm = rfm[[
        "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p",
        "T_weekly", "exp_sales_1_month", "exp_sales_3_month",
        "expected_average_profit", "cltv_p", "cltv_p_segment"
    ]]

    return rfm
예제 #11
0
def get_clv(oracle_conn_id, src_client_id, storage_bucket, ds, **context):
    import matplotlib.pyplot
    matplotlib.pyplot.ioff()
    ##
    from lifetimes.utils import calibration_and_holdout_data
    from lifetimes.plotting import plot_frequency_recency_matrix
    from lifetimes.plotting import plot_probability_alive_matrix
    from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases
    from lifetimes.plotting import plot_period_transactions
    from lifetimes.plotting import plot_history_alive
    from lifetimes.plotting import plot_cumulative_transactions
    from lifetimes.utils import expected_cumulative_transactions
    from lifetimes.utils import summary_data_from_transaction_data
    from lifetimes import BetaGeoFitter
    from lifetimes import GammaGammaFitter
    import datetime
    import pandas as pd
    import datalab.storage as gcs
    conn = OracleHook(oracle_conn_id=oracle_conn_id).get_conn()
    print(src_client_id, context)
    query = context['templates_dict']['query']
    data = pd.read_sql(query, con=conn)
    data.columns = data.columns.str.lower()
    print(data.head())

    # Calculate RFM values#
    calibration_end_date = datetime.datetime(2018, 5, 24)
    training_rfm = calibration_and_holdout_data(
        transactions=data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        calibration_period_end=calibration_end_date,
        freq='D',
        monetary_value_col='price_total')
    bgf = BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(training_rfm['frequency_cal'], training_rfm['recency_cal'],
            training_rfm['T_cal'])
    print(bgf)

    # Matrix charts
    plot_period_transactions_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_period_transactions_chart.svg'
    plot_frequency_recency_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_frequency_recency_matrix.svg'
    plot_probability_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_probability_alive_matrix.svg'
    plot_calibration_vs_holdout_chart = context.get("ds_nodash") + str(
        src_client_id) + '_plot_calibration_vs_holdout_purchases.svg'

    ax0 = plot_period_transactions(bgf, max_frequency=30)
    ax0.figure.savefig(plot_period_transactions_chart, format='svg')
    ax1 = plot_frequency_recency_matrix(bgf)
    ax1.figure.savefig(plot_frequency_recency_chart, format='svg')
    ax2 = plot_probability_alive_matrix(bgf)
    ax2.figure.savefig(plot_probability_chart, format='svg')
    ax3 = plot_calibration_purchases_vs_holdout_purchases(bgf,
                                                          training_rfm,
                                                          n=50)
    ax3.figure.savefig(plot_calibration_vs_holdout_chart, format='svg')
    full_rfm = summary_data_from_transaction_data(
        data,
        customer_id_col='src_user_id',
        datetime_col='pickup_date',
        monetary_value_col='price_total',
        datetime_format=None,
        observation_period_end=None,
        freq='D')
    returning_full_rfm = full_rfm[full_rfm['frequency'] > 0]
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(returning_full_rfm['frequency'],
            returning_full_rfm['monetary_value'])

    customer_lifetime = 30  # expected number of months lifetime of a customer
    clv = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        full_rfm['frequency'],
        full_rfm['recency'],
        full_rfm['T'],
        full_rfm['monetary_value'],
        time=customer_lifetime,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    ).sort_values(ascending=False)
    full_rfm_with_value = full_rfm.join(clv)

    full_rfm_file = context.get("ds_nodash") + "-src_client_id-" + str(
        src_client_id) + '-icabbi-test.csv'
    full_rfm_with_value.to_csv(full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            full_rfm_file,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_period_transactions_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_frequency_recency_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_probability_chart,
            filename=full_rfm_file)
    GoogleCloudStorageHook(
        google_cloud_storage_conn_id='google_conn_default').upload(
            bucket=storage_bucket,
            object=str(src_client_id) + "/" + context.get("ds_nodash") + "/" +
            plot_calibration_vs_holdout_chart,
            filename=full_rfm_file)
 def fit_ggf(self):
     self.ggf = GammaGammaFitter(penalizer_coef = 0)
     self.ggf.fit(self.return_customers['frequency'], self.return_customers['monetary_value'])
예제 #13
0
########################################
# BGNBD
########################################

# if you haven't lifetime library. You should install with pip install lifetimes.

bgf = BetaGeoFitter(penalizer_coef=0.001)  # BGNBD created.
bgf.fit(rfm["frequency"], rfm["recency_weekly_p"],
        rfm["T_weekly"])  # BGNBD fitted.

########################################
# Gamma Gamma
########################################

ggf = GammaGammaFitter(penalizer_coef=0.01)  # Gamma Gamma created.
ggf.fit(rfm["frequency"], rfm["monetary_avg"])  # Gamma gamma fitted.

# 6 Months CLTV Prediction

cltv_6_months = ggf.customer_lifetime_value(bgf,
                                            rfm['frequency'],
                                            rfm['recency_weekly_p'],
                                            rfm['T_weekly'],
                                            rfm['monetary_avg'],
                                            time=6,
                                            freq="W",
                                            discount_rate=0.01)

cltv_6_months = cltv_6_months.reset_index(
)  # indexes are broken. Reset_index fixed it.
예제 #14
0
# 1 aylık beklenen satış ile 3 aylık beklenen satışın grafiklerini çizdidim

rfm["exp_P_1"] = bgf.predict(4,rfm['frequency'],rfm['recency_weekly_p'],rfm['T_weekly'])
rfm["exp_P_3"] = bgf.predict(4*3,rfm['frequency'],rfm['recency_weekly_p'],rfm['T_weekly'])

fig, ax = plt.subplots()
ax.plot(rfm.index,rfm["exp_P_1"])
ax.plot(rfm.index,rfm["exp_P_3"])
plt.show()


##############################################################
# 3. GAMMA-GAMMA Modelinin Kurulması,
##############################################################
rfm.head()
ggf = GammaGammaFitter(penalizer_coef=0.01) # ggf model nesnesini oluşturdum
ggf.fit(rfm['frequency'], rfm['monetary_avg']) # veri setindeki frequency ve monetary_avg değişkenleri ile gama-gama moelini eğittim

# Gamam-Gama modeli ile beklenn ortalama getiriyi hesapladık!!!!!!!!!!!Burada Zamana göre prediction  yapamıyor muyuz? Ki zaten beklenen getiri zamanla değişmez mi?
ggf.conditional_expected_average_profit(rfm['frequency'],
                                           rfm['monetary_avg']).head(10)

ggf.conditional_expected_average_profit(rfm['frequency'],
                                        rfm['monetary_avg']).sort_values(ascending=False).head(10)

rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                         rfm['monetary_avg'])

rfm.sort_values("expected_average_profit", ascending=False).head(20)

rfm.shape
예제 #15
0
bgf.fit(summary_cal_holdout['frequency_cal'],
        summary_cal_holdout['recency_cal'], summary_cal_holdout['T_cal'])
plot_calibration_purchases_vs_holdout_purchases(bgf, summary_cal_holdout)
# Visualization

plot_frequency_recency_matrix(bgf)
plot_probability_alive_matrix(bgf)
plt.show()

### Gamma-Gamma model###
returning_customers_summary = data[data['frequency'] > 0]
returning_customers_summary[[
    'monetary_value', 'frequency'
]].corr()  # Correlation between monetary value and the purchase frequency.

ggf = GammaGammaFitter(penalizer_coef=0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
print(ggf)

# estimate the average transaction value
print(
    ggf.conditional_expected_average_profit(data['frequency'],
                                            data['monetary_value']).head(10))

# refit the BG model to the summary_with_money_value dataset
bgf.fit(data['frequency'], data['recency'], data['T'])

CLV_12M = ggf.customer_lifetime_value(
    bgf,  # the model to use to predict the number of future transactions
    data['frequency'],
예제 #16
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    ## recency for users dinamic.
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days,
                                                                lambda date: (today_date - date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})

    rfm.columns = rfm.columns.droplevel(0)

    ## recency_cltv_p
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    ## simplified monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]

    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # BGNBD CALCULATE WEEKLY RECENCY AND WEEKLY T for
    ## recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]

    ## recency filtre (cltv_p for much better calculation)
    rfm = rfm[(rfm['frequency'] > 1)]

    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly_cltv_p'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 months cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    ## recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm
예제 #17
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    # recency user-specific
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days,       # "recency_cltv_p"
                                                                lambda date: (today_date - date.min()).days],      # "T"
                                                'Invoice': lambda num: num.nunique(),                              # "frequency"
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})                # "monetary"
    rfm.columns = rfm.columns.droplevel(0)

    # recency_cltv_p
    rfm.columns = ["recency_cltv_p", "T", "frequency", "monetary"]

    # Simplified monetary_avg (since Gamma-Gamma model requires this way)
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]
    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # Calculating WEEKLY RECENCY VE WEEKLY T for BG/NBD MODEL
    # recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CHECK IT OUT! Monetary avg must be positive
    rfm = rfm[rfm["monetary_avg"] > 0]

    # recency filter
    rfm = rfm[(rfm["frequency"] > 1)]
    rfm["frequency"] = rfm["frequency"].astype(int)  # converting it to integer just in case!

    # Establishing the BGNBD Model
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm["frequency"],
            rfm["recency_weekly_cltv_p"],
            rfm["T_weekly"])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm["frequency"],
                                           rfm["recency_weekly_cltv_p"],
                                           rfm["T_weekly"])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm["frequency"],
                                           rfm["recency_weekly_cltv_p"],
                                           rfm["T_weekly"])

    # Establishing Gamma-Gamma Model  calculates=> Expected Average Profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm["frequency"], rfm["monetary_avg"])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm["frequency"],
                                                                             rfm["monetary_avg"])
    # CLTV Pred for 6 months
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm["frequency"],
                                       rfm["recency_weekly_cltv_p"],
                                       rfm["T_weekly"],
                                       rfm["monetary_avg"],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # Minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # rfm.fillna(0, inplace=True)

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    # recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[["recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm
예제 #18
0
customer_detail['p_alive'] = mbgnbd.conditional_probability_alive(
    customer_detail['frequency'], customer_detail['recency'],
    customer_detail['T'])
customer_detail.head()

# In[14]:

#The Gamma-Gamma model assumes that there is no relationship between the monetary value and the purchase frequency
customer_detail[['avg_order_value', 'frequency']].corr()

# In[15]:

#It is used to estimate the average monetary value of customer transactions
from lifetimes import GammaGammaFitter

gg = GammaGammaFitter(penalizer_coef=0.001)
gg.fit(customer_detail['frequency'],
       customer_detail['avg_order_value'],
       verbose=True)

print(
    gg.conditional_expected_average_profit(
        customer_detail['frequency'],
        customer_detail['avg_order_value']).head(10))

# In[16]:

customer_detail['clv'] = gg.customer_lifetime_value(
    mbgnbd,
    customer_detail['frequency'],
    customer_detail['recency'],
예제 #19
0
 def get_spend_model(self):
     return GammaGammaFitter(penalizer_coef=self.penalizer_coef)
예제 #20
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    # recency kullanıcıya özel dinamik.
    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate': [
            lambda date: (date.max() - date.min()).days, lambda date:
            (today_date - date.min()).days
        ],
        'Invoice':
        lambda num: num.nunique(),
        'TotalPrice':
        lambda TotalPrice: TotalPrice.sum()
    })

    rfm.columns = rfm.columns.droplevel(0)

    # recency_cltv_p
    rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

    # basitleştirilmiş monetary_avg
    rfm["monetary"] = rfm["monetary"] / rfm["frequency"]

    rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

    # BGNBD için WEEKLY RECENCY VE WEEKLY T'nin HESAPLANMASI
    # recency_weekly_cltv_p
    rfm["recency_weekly_cltv_p"] = rfm["recency_cltv_p"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # KONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]

    # recency filtre (daha saglıklı cltvp hesabı için)
    rfm = rfm[(rfm['frequency'] > 1)]

    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(rfm['frequency'], rfm['recency_weekly_cltv_p'], rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12, rfm['frequency'],
                                           rfm['recency_weekly_cltv_p'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(
        rfm['frequency'], rfm['monetary_avg'])
    # 6 aylık cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly_cltv_p'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # rfm.fillna(0, inplace=True)

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    # recency_cltv_p, recency_weekly_cltv_p
    rfm = rfm[[
        "recency_cltv_p", "T", "monetary_avg", "recency_weekly_cltv_p",
        "T_weekly", "exp_sales_1_month", "exp_sales_3_month",
        "expected_average_profit", "cltv_p", "cltv_p_segment"
    ]]

    return rfm
예제 #21
0
파일: AI.py 프로젝트: 0424048/AICRM
def predicted_purchase_time(account, timesteap):
    # df = pd.read_csv('AIexcel/' + account + '.csv' , sep=',', names=['name','uuid','invoiceDate','produce_name','Total'],encoding='utf8',low_memory=False)
    df = pd.read_csv(
        'AIexcel/' + account + '.csv',
        names=['name', 'uuid', 'invoiceDate', 'produce_name', 'Total'],
        sep=',',
        encoding='utf8',
        low_memory=False)
    #df.rename(columns={u'收件人姓名':u'name', u'收件人手機':u'uuid', u'付款日期':u'invoiceDate', u'商品名稱':u'produce_name', u'商品總價':u'Total'}, inplace=True)
    df_ga = pd.read_csv('AIexcel/' + account + '_ga.csv',
                        names=['uuid', 'level', 'next_time'],
                        sep=',',
                        encoding='utf8',
                        low_memory=False)
    df_UserLabel = df_ga['level'][1:].tolist()
    df_ga.drop([0], inplace=True)
    if 'level' in df_ga:
        df_ga['level'] = df_ga.apply(ga_toLevel, axis=1)

    df = df.ix[df.invoiceDate.str.len() == 19]
    df = df.ix[df.name.str.len() <= 10]
    # take three columns
    df1 = df[['uuid', 'invoiceDate', 'Total']]
    # drop price == 1
    df1_ = df1.drop(df1[df1['invoiceDate'] == 1].index)
    # drop non-data
    df_drop = df1_.dropna()
    # change columns name
    dataframe = df_drop
    dataframe['invoiceDate'] = pd.to_datetime(dataframe['invoiceDate']).dt.date
    dataframe.Total = dataframe.Total.astype(float)
    data = summary_data_from_transaction_data(
        dataframe,
        'uuid',
        'invoiceDate',
        observation_period_end=dataframe.invoiceDate.max())
    data2 = summary_data_from_transaction_data(
        dataframe,
        'uuid',
        'invoiceDate',
        monetary_value_col='Total',
        observation_period_end=dataframe.invoiceDate.max())

    bgf = BetaGeoFitter(penalizer_coef=0.01)
    bgf.fit(data['frequency'], data['recency'], data['T'])
    purchase_time = data
    purchase_time[
        'predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
            30, data['frequency'], data['recency'], data['T'])
    predicted_purchases_df = purchase_time[[
        'predicted_purchases'
    ]].sort_values(by='predicted_purchases', ascending=False)
    predicted_purchases_df['cycle'] = data['recency'] / data['frequency']
    returning_customers_summary = data2[(data2['frequency'] > 0)
                                        & (data2['monetary_value'] != 0)]
    ggf = GammaGammaFitter(penalizer_coef=0.001)
    ggf.fit(returning_customers_summary['frequency'],
            returning_customers_summary['monetary_value'])
    income = ggf.conditional_expected_average_profit(
        returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value']).to_frame()
    income.columns = ['predicted_price']
    predicted_purchases_df = predicted_purchases_df.merge(income,
                                                          on=['uuid'],
                                                          how='left')
    predicted_purchases_df.reset_index(inplace=True)

    mask = predicted_purchases_df.predicted_purchases > 1
    predicted_purchases_df.loc[mask, 'predicted_purchases'] = 1
    predicted_purchases_df['predicted_purchases'] = predicted_purchases_df[
        'predicted_purchases'].astype(float)
    predicted_purchases_df = predicted_purchases_df.sort_values(
        by=['predicted_purchases'], ascending=False)
    predicted_purchases_df['predicted_purchases'] = predicted_purchases_df[
        'predicted_purchases'].apply(lambda x: format(x, '.2%'))

    predicted_purchases_df = predicted_purchases_df.merge(df_ga,
                                                          left_on="uuid",
                                                          right_on="uuid",
                                                          how='left')

    predicted_purchases_df['level'] = predicted_purchases_df.apply(flag_df,
                                                                   axis=1)
    #predicted_purchases_df['level'] = predicted_purchases_df['level'].fillna(1)
    predicted_purchases_df.replace(np.nan, 0, inplace=True)
    predicted_purchases_df.replace(np.inf, 0, inplace=True)
    if 'next_time' not in predicted_purchases_df.columns:
        predicted_purchases_df['next_time'] = np.nan
    predicted_purchases_df['next_time'] = pd.to_datetime(
        predicted_purchases_df['next_time'])

    predicted_purchases_df_N = predicted_purchases_df[~(
        predicted_purchases_df.uuid.isin(
            ((predicted_purchases_df[predicted_purchases_df.next_time >= today]
              .uuid).astype(str)).tolist()))]
    predicted_purchases_df_off = predicted_purchases_df[(
        predicted_purchases_df.uuid.isin(
            ((predicted_purchases_df[predicted_purchases_df.next_time >= today]
              .uuid).astype(str)).tolist()))]
    new_df = predicted_purchases_df_N.append(predicted_purchases_df_off,
                                             ignore_index=True)
    predicted_purchases_df_N['cycle'] = (
        predicted_purchases_df_N['cycle'] *
        predicted_purchases_df_N['level']).round(0).astype(int)
    predicted_purchases_df_N[
        'next_time'] = today + predicted_purchases_df_N.apply(time_df, axis=1)
    predicted_purchases_df_NQ = predicted_purchases_df_N.dropna()
    predicted_purchases_df_off = predicted_purchases_df_off.drop(
        columns=['predicted_purchases', 'cycle', 'predicted_price'])
    predicted_purchases_df_NQ = predicted_purchases_df_NQ.drop(
        columns=['predicted_purchases', 'cycle', 'predicted_price'])
    df_ga = df_ga.merge(predicted_purchases_df_off,
                        left_on="uuid",
                        right_on="uuid",
                        how='left')
    df_ga = df_ga.merge(predicted_purchases_df_NQ,
                        left_on="uuid",
                        right_on="uuid",
                        how='left')
    notNull_df = df_ga[
        df_ga['level'].notnull() & df_ga['next_time'].notnull()].drop(
            columns=['level_y', 'next_time_y', 'next_time_x', 'level_x'])
    notNull_df2 = df_ga[
        df_ga['level_y'].notnull() & df_ga['next_time_y'].notnull()].drop(
            columns=['level', 'next_time', 'next_time_x', 'level_x'])
    notNull_df2.columns = ['uuid', 'level', 'next_time']
    res = pd.concat([notNull_df, notNull_df2], axis=0, ignore_index=True)
    res.rename(columns={u'uuid': u'收件人手機'}, inplace=True)
    res['UserLabel'] = pd.Series(df_UserLabel)
    res = res[[u'收件人手機', u'UserLabel', u'next_time']]
    # res.to_csv('AIexcel/' + account + '_ga.csv',index=False,encoding='utf8')
    predicted_purchases_df_N = predicted_purchases_df_N.drop(
        columns=['level', 'cycle', 'next_time'])
    predicted_purchases_df_N.columns = [u'收件人手機', u'顧客購買機率', u'平均交易金額']

    return predicted_purchases_df_N


# print(predicted_purchase_time(account,30)[:30])
예제 #22
0
def generate_clv_table(data, clv_prediction_time=None, model_penalizer=None):

    #set default values if they are not stated
    if clv_prediction_time is None:
        clv_prediction_time = 12
    if model_penalizer is None:
        model_penalizer = 0

    # Reformat csv as a Pandas dataframe
    #data = pd.read_csv(csv_file)

    #Remove non search sessions
    data = data[data['Searches'] > 0]

    max_date = data['activity_date'].max()

    # Using "summary_data_from_transaction_data" function to agregate the activity stream into the appropriate metrics
    # Model requires 'activity_date' column name.  For our purpose this is synonymous with submission_date.
    summary = summary_data_from_transaction_data(
        data,
        'client_id',
        'activity_date',
        'Revenue',
        observation_period_end=max_date)

    # Building the Model using BG/NBD
    bgf = BetaGeoFitter(penalizer_coef=model_penalizer)
    bgf.fit(summary['frequency'], summary['recency'], summary['T'])

    # Conditional expected purchases
    # These are the expected purchases expected from each individual given the time specified

    # t = days in to future
    t = 14
    summary[
        'predicted_searches'] = bgf.conditional_expected_number_of_purchases_up_to_time(
            t, summary['frequency'], summary['recency'], summary['T'])

    #Conditional Alive Probability
    summary['alive_prob'] = summary.apply(
        lambda row: calc_alive_prob(row, bgf), axis=1)
    summary['alive_prob'] = summary['alive_prob'].astype(float)
    #print summary['alive_prob']

    # There cannot be non-positive values in the monetary_value or frequency vector
    summary_with_value_and_returns = summary[(summary['monetary_value'] > 0)
                                             & (summary['frequency'] > 0)]

    # There cannot be zero length vectors in one of frequency, recency or T
    #summary_with_value_and_returns =
    #print summary_with_value_and_returns[
    #    (len(summary_with_value_and_returns['recency'])>0) &
    #    (len(summary_with_value_and_returns['frequency'])>0) &
    #    (len(summary_with_value_and_returns['T'])>0)
    #]

    if any(
            len(x) == 0 for x in [
                summary_with_value_and_returns['recency'],
                summary_with_value_and_returns['frequency'],
                summary_with_value_and_returns['T']
            ]):
        logger.debug(data['client_id'])

    # Setting up Gamma Gamma model
    ggf = GammaGammaFitter(penalizer_coef=0)
    ggf.fit(summary_with_value_and_returns['frequency'],
            summary_with_value_and_returns['monetary_value'])

    # Output average profit per tranaction by client ID
    ggf_output = ggf.conditional_expected_average_profit(
        summary_with_value_and_returns['frequency'],
        summary_with_value_and_returns['monetary_value'])

    # Refitting the BG/NBD model with the same data if frequency, recency or T are not zero length vectors
    if not (len(x) == 0 for x in [
            summary_with_value_and_returns['recency'],
            summary_with_value_and_returns['frequency'],
            summary_with_value_and_returns['T']
    ]):
        bgf.fit(summary_with_value_and_returns['frequency'],
                summary_with_value_and_returns['recency'],
                summary_with_value_and_returns['T'])

    # Getting Customer lifetime value using the Gamma Gamma output
    # NOTE: the time can be adjusted, but is currently set to 12 months

    customer_predicted_value = ggf.customer_lifetime_value(
        bgf,  #the model to use to predict the number of future transactions
        summary_with_value_and_returns['frequency'],
        summary_with_value_and_returns['recency'],
        summary_with_value_and_returns['T'],
        summary_with_value_and_returns['monetary_value'],
        time=clv_prediction_time,  # months
        discount_rate=0.01  # monthly discount rate ~ 12.7% annually
    )

    # Converting to dataframe
    df_cpv = pd.DataFrame({
        'client_id': customer_predicted_value.index,
        'pred_values': customer_predicted_value.values
    })

    # Setting client_id as index
    df_cpv = df_cpv.set_index('client_id')

    # Merge with original summary
    df_merged = pd.merge(summary,
                         df_cpv,
                         left_index=True,
                         right_index=True,
                         how='outer')

    # Historical CLV
    data_hist = data.groupby(
        ['client_id'])['Searches',
                       'Revenue'].apply(lambda x: x.astype(float).sum())

    # Merge with original summary
    df_final = pd.merge(df_merged,
                        data_hist,
                        left_index=True,
                        right_index=True,
                        how='outer')

    # Prevent NaN on the pred_clv column
    df_final.pred_values[df_final.frequency == 0] = 0.0

    # Create column that combines historical and predicted customer value
    df_final['total_clv'] = df_final['pred_values'] + df_final['Revenue']

    # Create column which calculates in days the number of days since they were last active
    df_final['last_active'] = df_final['T'] - df_final['recency']

    # Create a column which labels users inactive over 14 days as "Expired" ELSE "Active"
    df_final['user_status'] = np.where(df_final['last_active'] > 14, 'Expired',
                                       'Active')

    # Add column with date of calculation
    # Set calc_date to max submission date
    df_final['calc_date'] = max_date.date()  #pd.Timestamp('today').date()

    # Rename columns as appropriate
    df_final.columns = [
        'frequency', 'recency', 'customer_age', 'avg_session_value',
        'predicted_searches_14_days', 'alive_probability',
        'predicted_clv_12_months', 'historical_searches', 'historical_clv',
        'total_clv', 'days_since_last_active', 'user_status', 'calc_date'
    ]

    #Prevent non returning customers from having 100% alive probability
    df_final.alive_probability[df_final.frequency == 0] = 0.0

    return df_final
예제 #23
0
def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)
    rfm = dataframe.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (today_date - date.max()).days,
                                                                lambda date: (today_date - date.min()).days],
                                                'Invoice': lambda num: num.nunique(),
                                                'TotalPrice': lambda TotalPrice: TotalPrice.sum()})

    rfm.columns = rfm.columns.droplevel(0)

    rfm.columns = ['recency', 'T', 'frequency', 'monetary']

    # CALCULATION OF MONETARY AVG & ADDING RFM INTO DF
    temp_df = dataframe.groupby(["Customer ID", "Invoice"]).agg({"TotalPrice": ["mean"]})
    temp_df = temp_df.reset_index()
    temp_df.columns = temp_df.columns.droplevel(0)
    temp_df.columns = ["Customer ID", "Invoice", "total_price_mean"]
    temp_df2 = temp_df.groupby(["Customer ID"], as_index=False).agg({"total_price_mean": ["mean"]})
    temp_df2.columns = temp_df2.columns.droplevel(0)
    temp_df2.columns = ["Customer ID", "monetary_avg"]

    rfm = rfm.merge(temp_df2, how="left", on="Customer ID")
    rfm.set_index("Customer ID", inplace=True)
    rfm.index = rfm.index.astype(int)

    # CALCULATION OF WEEKLY RECENCY AND WEEKLY T FOR BGNBD
    rfm["recency_weekly"] = rfm["recency"] / 7
    rfm["T_weekly"] = rfm["T"] / 7

    # CONTROL
    rfm = rfm[rfm["monetary_avg"] > 0]
    rfm["frequency"] = rfm["frequency"].astype(int)

    # BGNBD
    bgf = BetaGeoFitter(penalizer_coef=0.001)
    bgf.fit(rfm['frequency'],
            rfm['recency_weekly'],
            rfm['T_weekly'])

    # exp_sales_1_month
    rfm["exp_sales_1_month"] = bgf.predict(4,
                                           rfm['frequency'],
                                           rfm['recency_weekly'],
                                           rfm['T_weekly'])
    # exp_sales_3_month
    rfm["exp_sales_3_month"] = bgf.predict(12,
                                           rfm['frequency'],
                                           rfm['recency_weekly'],
                                           rfm['T_weekly'])

    # expected_average_profit
    ggf = GammaGammaFitter(penalizer_coef=0.001)
    ggf.fit(rfm['frequency'], rfm['monetary_avg'])
    rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                             rfm['monetary_avg'])
    # 6 MONTHS cltv_p
    cltv = ggf.customer_lifetime_value(bgf,
                                       rfm['frequency'],
                                       rfm['recency_weekly'],
                                       rfm['T_weekly'],
                                       rfm['monetary_avg'],
                                       time=6,
                                       freq="W",
                                       discount_rate=0.01)

    rfm["cltv_p"] = cltv

    # minmaxscaler
    scaler = MinMaxScaler(feature_range=(1, 100))
    scaler.fit(rfm[["cltv_p"]])
    rfm["cltv_p"] = scaler.transform(rfm[["cltv_p"]])

    # cltv_p_segment
    rfm["cltv_p_segment"] = pd.qcut(rfm["cltv_p"], 3, labels=["C", "B", "A"])

    rfm = rfm[["monetary_avg", "T", "recency_weekly", "T_weekly",
               "exp_sales_1_month", "exp_sales_3_month", "expected_average_profit",
               "cltv_p", "cltv_p_segment"]]

    return rfm