def test_using_weights_col_gives_correct_results( self, cdnow_customers_with_monetary_value): cdnow_customers_with_monetary_value = cdnow_customers_with_monetary_value[ cdnow_customers_with_monetary_value["frequency"] > 0] cdnow_customers_weights = cdnow_customers_with_monetary_value.copy() cdnow_customers_weights["weights"] = 1.0 cdnow_customers_weights = cdnow_customers_weights.groupby( ["frequency", "monetary_value"])["weights"].sum() cdnow_customers_weights = cdnow_customers_weights.reset_index() assert (cdnow_customers_weights["weights"] > 1).any() gg_weights = lt.GammaGammaFitter(penalizer_coef=0.0) gg_weights.fit( cdnow_customers_weights["frequency"], cdnow_customers_weights["monetary_value"], weights=cdnow_customers_weights["weights"], ) gg_no_weights = lt.GammaGammaFitter(penalizer_coef=0.0) gg_no_weights.fit( cdnow_customers_with_monetary_value["frequency"], cdnow_customers_with_monetary_value["monetary_value"]) npt.assert_almost_equal( np.array(gg_no_weights._unload_params("p", "q", "v")), np.array(gg_weights._unload_params("p", "q", "v")), decimal=3, )
def test_fit_with_index(self, cdnow_customers_with_monetary_value): returning_cdnow_customers_with_monetary_value = cdnow_customers_with_monetary_value[ cdnow_customers_with_monetary_value["frequency"] > 0] ggf = lt.GammaGammaFitter() index = range(len(returning_cdnow_customers_with_monetary_value), 0, -1) ggf.fit( returning_cdnow_customers_with_monetary_value["frequency"], returning_cdnow_customers_with_monetary_value["monetary_value"], index=index, ) assert (ggf.data.index == index).all() ggf = lt.GammaGammaFitter() ggf.fit( returning_cdnow_customers_with_monetary_value["frequency"], returning_cdnow_customers_with_monetary_value["monetary_value"], index=None, ) assert not (ggf.data.index == index).all()
def test_customer_lifetime_value_with_bgf( self, cdnow_customers_with_monetary_value): ggf = lt.GammaGammaFitter() ggf.params_ = pd.Series({"p": 6.25, "q": 3.74, "v": 15.44}) bgf = lt.BetaGeoFitter() bgf.fit( cdnow_customers_with_monetary_value["frequency"], cdnow_customers_with_monetary_value["recency"], cdnow_customers_with_monetary_value["T"], ) ggf_clv = ggf.customer_lifetime_value( bgf, cdnow_customers_with_monetary_value["frequency"], cdnow_customers_with_monetary_value["recency"], cdnow_customers_with_monetary_value["T"], cdnow_customers_with_monetary_value["monetary_value"], ) utils_clv = utils._customer_lifetime_value( bgf, cdnow_customers_with_monetary_value["frequency"], cdnow_customers_with_monetary_value["recency"], cdnow_customers_with_monetary_value["T"], ggf.conditional_expected_average_profit( cdnow_customers_with_monetary_value["frequency"], cdnow_customers_with_monetary_value["monetary_value"]), ) npt.assert_equal(ggf_clv.values, utils_clv.values) ggf_clv = ggf.customer_lifetime_value( bgf, cdnow_customers_with_monetary_value["frequency"], cdnow_customers_with_monetary_value["recency"], cdnow_customers_with_monetary_value["T"], cdnow_customers_with_monetary_value["monetary_value"], freq="H", ) utils_clv = utils._customer_lifetime_value( bgf, cdnow_customers_with_monetary_value["frequency"], cdnow_customers_with_monetary_value["recency"], cdnow_customers_with_monetary_value["T"], ggf.conditional_expected_average_profit( cdnow_customers_with_monetary_value["frequency"], cdnow_customers_with_monetary_value["monetary_value"]), freq="H", ) npt.assert_equal(ggf_clv.values, utils_clv.values)
def test_params_out_is_close_to_Hardie_paper( self, cdnow_customers_with_monetary_value): returning_cdnow_customers_with_monetary_value = cdnow_customers_with_monetary_value[ cdnow_customers_with_monetary_value["frequency"] > 0] ggf = lt.GammaGammaFitter() ggf.fit( returning_cdnow_customers_with_monetary_value["frequency"], returning_cdnow_customers_with_monetary_value["monetary_value"], ) expected = np.array([6.25, 3.74, 15.44]) npt.assert_array_almost_equal(expected, np.array( ggf._unload_params("p", "q", "v")), decimal=2)
def test_conditional_expected_average_profit( self, cdnow_customers_with_monetary_value): ggf = lt.GammaGammaFitter() ggf.params_ = pd.Series({"p": 6.25, "q": 3.74, "v": 15.44}) summary = cdnow_customers_with_monetary_value.head(10) estimates = ggf.conditional_expected_average_profit( summary["frequency"], summary["monetary_value"]) expected = np.array([ 24.65, 18.91, 35.17, 35.17, 35.17, 71.46, 18.91, 35.17, 27.28, 35.17 ]) # from Hardie spreadsheet http://brucehardie.com/notes/025/ npt.assert_allclose(estimates.values, expected, atol=0.1)
The distribution of average transaction values across customers is independent of the transaction process. ''' # We are considering only customers who made repeat purchases with the business i.e., frequency > 0. Because, if frequency is 0, it means that they are one time customer and are considered already dead. # final assumption (no relationship between frequency and monetary value of transactions) can be validated using Pearson correlation. # Checking the relationship between frequency and monetary_value # return_customers_summary = summary[summary['frequency']>0] return_customers_summary = summary[summary['frequency']>0][summary['monetary_value']>0] # added additional filter to exclude transactions with <=0 monetary_value print(return_customers_summary.shape) return_customers_summary.head() # Checking the relationship between frequency and monetary_value return_customers_summary[['frequency', 'monetary_value']].corr() # Modeling the monetary value using Gamma-Gamma Model ggf = lifetimes.GammaGammaFitter(penalizer_coef=0.001) ggf.fit(return_customers_summary['frequency'], return_customers_summary['monetary_value']) # Summary of the fitted parameters ggf.summary # predict using the model # predict the expected average profit for each each transaction and Customer Lifetime Value using the model # Calculating the conditional expected average profit for each customer per transaction # model.conditional_expected_average_profit(): This method computes the conditional expectation of the average profit per transaction for a group of one or more customers summary = summary[summary['monetary_value'] >0] summary['exp_avg_sales'] = ggf.conditional_expected_average_profit(summary['frequency'], summary['monetary_value'])
def load_data(data, day=t_days, profit=profit_m): input_data = pd.read_csv(data) input_data = pd.DataFrame(input_data.iloc[:, 1:]) #Pareto Model pareto_model = lifetimes.ParetoNBDFitter(penalizer_coef=0.1) pareto_model.fit(input_data["frequency"], input_data["recency"], input_data["T"]) input_data[ "p_not_alive"] = 1 - pareto_model.conditional_probability_alive( input_data["frequency"], input_data["recency"], input_data["T"]) input_data["p_alive"] = pareto_model.conditional_probability_alive( input_data["frequency"], input_data["recency"], input_data["T"]) t = days input_data[ "predicted_purchases"] = pareto_model.conditional_expected_number_of_purchases_up_to_time( t, input_data["frequency"], input_data["recency"], input_data["T"]) #Gamma Gamma Model idx = input_data[(input_data["frequency"] <= 0.0)] idx = idx.index input_data = input_data.drop(idx, axis=0) m_idx = input_data[(input_data["monetary_value"] <= 0.0)].index input_data = input_data.drop(m_idx, axis=0) input_data.reset_index().drop("index", axis=1, inplace=True) ggf_model = lifetimes.GammaGammaFitter(penalizer_coef=0.1) ggf_model.fit(input_data["frequency"], input_data["monetary_value"]) input_data[ "expected_avg_sales_"] = ggf_model.conditional_expected_average_profit( input_data["frequency"], input_data["monetary_value"]) input_data["predicted_clv"] = ggf_model.customer_lifetime_value( pareto_model, input_data["frequency"], input_data["recency"], input_data["T"], input_data["monetary_value"], time=30, freq='D', discount_rate=0.01) input_data["profit_margin"] = input_data["predicted_clv"] * profit input_data = input_data.reset_index().drop("index", axis=1) #K-Means Model col = [ "predicted_purchases", "expected_avg_sales_", "predicted_clv", "profit_margin" ] new_df = input_data[col] k_model = KMeans(n_clusters=4, init="k-means++", n_jobs=-1, max_iter=1000).fit(new_df) labels = k_model.labels_ labels = pd.Series(labels, name="Labels") input_data = pd.concat([input_data, labels], axis=1) st.write(input_data) #adding a count bar chart fig = alt.Chart(input_data).mark_bar().encode(y="Labels:N", x="count(Labels):Q") #adding a annotation to the chart text = fig.mark_text(align="left", baseline="middle", dx=3).encode(text="count(Labels):Q") chart = (fig + text) #showing the chart st.altair_chart(chart, use_container_width=True) #creating a button to download the result st.markdown(""" ### Download Your File Now!!! """) text = """\ There is currently no official way of downloading data from Streamlit as if now. So Please download the data from the below link using **"Save As"**.""" st.markdown(text) download = input_data # When no file name is given, pandas returns the CSV as a string, nice. csv = download.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode( ) # some strings <-> bytes conversions necessary here href = f'<a href="data:file/csv;base64,{b64}">Download CSV File</a> (right-click and save as <some_name>.csv)' st.markdown(href, unsafe_allow_html=True)
def load_data(data, day=t_days, profit=profit_m): input_data = pd.read_csv(data) input_data = pd.DataFrame(input_data.iloc[:, 1:]) #Pareto Model pareto_model = lifetimes.ParetoNBDFitter(penalizer_coef=0.1) pareto_model.fit(input_data["frequency"], input_data["recency"], input_data["T"]) input_data[ "p_not_alive"] = 1 - pareto_model.conditional_probability_alive( input_data["frequency"], input_data["recency"], input_data["T"]) input_data["p_alive"] = pareto_model.conditional_probability_alive( input_data["frequency"], input_data["recency"], input_data["T"]) t = days input_data[ "predicted_purchases"] = pareto_model.conditional_expected_number_of_purchases_up_to_time( t, input_data["frequency"], input_data["recency"], input_data["T"]) #Gamma Gamma Model idx = input_data[(input_data["frequency"] <= 0.0)] idx = idx.index input_data = input_data.drop(idx, axis=0) m_idx = input_data[(input_data["monetary_value"] <= 0.0)].index input_data = input_data.drop(m_idx, axis=0) input_data.reset_index().drop("index", axis=1, inplace=True) ggf_model = lifetimes.GammaGammaFitter(penalizer_coef=0.1) ggf_model.fit(input_data["frequency"], input_data["monetary_value"]) input_data[ "expected_avg_sales_"] = ggf_model.conditional_expected_average_profit( input_data["frequency"], input_data["monetary_value"]) input_data["predicted_clv"] = ggf_model.customer_lifetime_value( pareto_model, input_data["frequency"], input_data["recency"], input_data["T"], input_data["monetary_value"], time=30, freq='D', discount_rate=0.01) input_data["profit_margin"] = input_data["predicted_clv"] * profit input_data = input_data.reset_index().drop("index", axis=1) #K-Means Model col = [ "predicted_purchases", "expected_avg_sales_", "predicted_clv", "profit_margin" ] new_df = input_data[col] k_model = KMeans(n_clusters=4, init="k-means++", n_jobs=-1, max_iter=1000).fit(new_df) labels = k_model.labels_ labels = pd.Series(labels, name="Labels") input_data = pd.concat([input_data, labels], axis=1) label_mapper = dict({0: "Low", 3: "Medium", 1: "High", 2: "V_High"}) input_data["Labels"] = input_data["Labels"].map(label_mapper) #saving the input data in the separate variable download = input_data st.write(input_data) #adding a count bar chart fig = alt.Chart(input_data).mark_bar().encode(y="Labels:N", x="count(Labels):Q") #adding a annotation to the chart text = fig.mark_text(align="left", baseline="middle", dx=3).encode(text="count(Labels):Q") chart = (fig + text) #showing the chart st.altair_chart(chart, use_container_width=True) #creating a button to download the result if st.button("Download"): st.write( "Successfully Downloaded!!! Please Check Your Default Download Location...:smile:" ) return download.to_csv("customer_lifetime_prediction_result.csv")