def probability_alive(historical_rfm_data):
    """
    Predicted Conditional Probability Alive.

    Parameters
    ----------
        historical_rfm_data: Historical Frequency, Recency & T of an individual

    Returns
    -------
        Conditional Probability Alive.
    """
    clv_model = BetaGeoFitter(penalizer_coef=0.0)
    clv_model.load_model(path="models/customer_lifetime_estimator.pkl")

    alive_probability = clv_model.conditional_probability_alive(
        frequency=historical_rfm_data["frequency"],
        recency=historical_rfm_data["recency"],
        T=historical_rfm_data["T"])
    return alive_probability
예제 #2
0
파일: tools.py 프로젝트: dgmiller/portfolio
class CLV(object):
    """
    INPUT
        pmg_num (int) the product market group number, default = 1
        outfile1 (str) the filename indicating where to store the raw data before analysis, default = '../data/clvtrainingset01.csv'
        outfile2 (str) the filename containing the results, default = '../data/clv01.csv'
        date_range (list) the start date and end date of the years to analyze, default = ['2008-09-01','2016-09-01']
    attributes other than those listed above
        self.data (DataFrame) a pandas DataFrame object of the data to be used for analysis
        self.bgf (from lifetimes) a statistical model object from the lifetimes package
        self.ggf (from lifetimes) a statistical model object from the lifetimes package
        self.results (DataFrame) a pandas DataFrame object of the results of analysis
    """
    def __init__(self,pmg_num=1,outfile1='../data/clvtrainingset01.csv',outfile2='../data/clv01.csv',date_range=['2008-09-01','2016-09-01']):
        self.pmg_num = pmg_num
        # outfile1 stores a clean version of the raw data used for analysis; this is important for reproducibility
        self.outfile1 = outfile1
        # outfile2 stores the clv estimation results
        self.outfile2 = outfile2
        self.date_range = date_range
        self.data = None
        self.bgf = None
        self.ggf = None
        self.results = None

    def get_data_from_server(self,cmd=None):
        """
        Gets data from sales_db and stores the query results in self.data
        INPUT
            cmd (str) the default sql query is below

            The default query has been replaced. The original query was an 8 line select command.
        """
        # server name
        dsn = "THE SERVER NAME"
        cnxn_name = "DSN=%s" % dsn
        connection = odbc.connect(cnxn_name) # use to access the database
        c = connection.cursor() # generate cursor object
        
        # Grab transaction data from Postgres
        if not cmd:
            cmd = """SQL DEFAULT COMMAND GOES HERE""" % (self.pmg_num,self.date_range[0],self.date_range[1])
        
        c.execute(cmd) # execute the sql command
        
        # list to store the query data
        transaction_data = []
        
        # create a dictionary to convert customer ids to name
        to_name = dict(np.genfromtxt('../data/names.csv',dtype=str,delimiter='\t'))
        
        for row in c:
            cust, rsv_date, sales = row # pull data from each row of the query data
            cust_id = str(int(cust))
            name = to_name[cust_id]
            # check to see if customer is inactive
            if use(name):
                rsv_date1_readable = rsv_date.strftime('%Y-%m-%d') # date formatting
                sales_float = float(sales) # convert to float; represents the transaction amount
                transaction_data.append({"id":cust, "date":rsv_date, "sales":sales_float}) # add dictionary of data to list
        
        # convert to dataframe
        df = pd.DataFrame(transaction_data, columns=['id', 'date', 'sales'])
        # store results
        df.to_csv(self.outfile1,index=False)
        # IMPORTANT: use correct observation_period_end date
        self.data = summary_data_from_transaction_data(df, 'id', 'date', 'sales', observation_period_end=self.date_range[1], freq='M')

    def get_data_from_file(self,filename,**kwargs):
        df = pd.read_csv(filename,**kwargs)
        self.data = summary_data_from_transaction_data(df, 'id', 'date', 'sales', observation_period_end=self.date_range[1], freq='M')

    def fit(self,months=96):
        """
        Computes CLV estimates for the next n months and stores results in self.results
        INPUT
            months (int) number of months to predict, default = 96 (8 years)
        """
        ### PREDICT NUMBER OF PURCHASES
        self.bgf = BetaGeoFitter() # see lifetimes module documentation for details
        self.bgf.fit(self.data['frequency'], self.data['recency'], self.data['T'])
        # 8 years = 96 months
        self.data['predicted_purchases'] = self.bgf.conditional_expected_number_of_purchases_up_to_time(
                months,
                self.data['frequency'],
                self.data['recency'],
                self.data['T'])

        ### PREDICT FUTURE PURCHASE AMOUNT
        self.ggf = GammaGammaFitter(penalizer_coef = 0)
        self.ggf.fit(self.data['frequency'], self.data['monetary_value'])
        # predict next transaction
        self.data['predicted_trans_profit'] = self.ggf.conditional_expected_average_profit(
                frequency = self.data['frequency'],
                monetary_value = self.data['monetary_value'])
        
        ### ESTIMATE CLV
        self.data['clv_estimation'] = self.data['predicted_trans_profit'] * self.data['predicted_purchases']
        self.data['prob_alive'] = self.bgf.conditional_probability_alive(
                self.data['frequency'],
                self.data['recency'],
                self.data['T'])
        self.results = self.data.sort_values(by='clv_estimation',ascending=False)
        # store results
        self.results.to_csv(self.outfile2,index=False)

    def plot_matrices(self):
        """
        plots three matrices:
            probability alive matrix: displays the probability that a customer is active
            frequency recency matrix: displays frequency and recency with color corresponding
                                        to monetary value
            period transactions: displays predicted and actual transaction values over time
            (check documentation in lifetimes for more details)
        """
        plot_probability_alive_matrix(self.bgf,cmap='viridis')
        plot_frequency_recency_matrix(self.bgf,cmap='viridis')
        plot_period_transactions(self.bgf)
예제 #3
0
파일: Main.py 프로젝트: colllz/CLV
from lifetimes import BetaGeoFitter
#parameter eliminates overfitting and noise and robust
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(modeldata['frequency'], modeldata['recency'], modeldata['T'])
print(bgf)

# create frequency recency matrix
from lifetimes.plotting import plot_frequency_recency_matrix

plot_frequency_recency_matrix(bgf) #plot for predict purchase

from lifetimes.plotting import plot_probability_alive_matrix

#probability of being alive
modeldata['Churn_probability'] = bgf.conditional_probability_alive(modeldata['frequency'], modeldata['recency'], modeldata['T'])

#plot for churn or probability of being alive
fig = plt.figure(figsize=(12,8))
plot_probability_alive_matrix(bgf)


# predict number of purchase customer will make
t = 30  #number of days to predict customer will make purchase
modeldata['predicted_num_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(t, modeldata['frequency'], modeldata['recency'], modeldata['T'])
modeldata.sort_values(by='predicted_num_purchases').tail(5)


from lifetimes.plotting import plot_period_transactions
#used to validate the model
plot_period_transactions(bgf) 
예제 #4
0
############################### Customer Life Time Value Calculationn ##########
# refit the BG model to the summary_with_money_value dataset, #the model to use to predict the number of future transactions
from lifetimes import BetaGeoFitter
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['recency'],
        returning_customers_summary['T'])

CLV_1Year = ggf.customer_lifetime_value(
    bgf,
    returning_customers_summary['frequency'],
    returning_customers_summary['recency'],
    returning_customers_summary['T'],
    returning_customers_summary['monetary_value'],
    time=12,
    freq='D')
CLV_1Year = pd.Series(CLV_1Year)

################# Churn Probability ###############################
# probability of being churn: model is going to predict customer churn, i.e probability of customer being dead or probability that a customer will leave
alive = bgf.conditional_probability_alive(
    returning_customers_summary['frequency'],
    returning_customers_summary['recency'], returning_customers_summary['T'])

################ Final Output ###############################
returning_customers_summary2 = returning_customers_summary.copy()
returning_customers_summary2['Churn_Probability'] = 1 - alive
returning_customers_summary2['AVG_SALE'] = AVG_Profit
returning_customers_summary2['CLV_1Year'] = CLV_1Year
returning_customers_summary2.to_csv('output.csv')